Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%
1545 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 10:24 +0000
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 10:24 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from datetime import timedelta
45from threading import Barrier
47import astropy.time
48import sqlalchemy
50try:
51 import numpy as np
52except ImportError:
53 np = None
55import lsst.sphgeom
56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
58from ..._dataset_association import DatasetAssociation
59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
60from ..._dataset_type import DatasetType
61from ..._exceptions import (
62 CollectionTypeError,
63 DataIdValueError,
64 InconsistentDataIdError,
65 MissingCollectionError,
66 MissingDatasetTypeError,
67)
68from ..._exceptions_legacy import DatasetTypeError
69from ..._storage_class import StorageClass
70from ..._timespan import Timespan
71from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
72from .._collection_summary import CollectionSummary
73from .._collection_type import CollectionType
74from .._config import RegistryConfig
75from .._exceptions import (
76 ArgumentError,
77 CollectionError,
78 ConflictingDefinitionError,
79 DatasetTypeExpressionError,
80 NoDefaultCollectionError,
81 OrphanedRecordError,
82)
83from .._registry import Registry
84from ..interfaces import ButlerAttributeExistsError
85from ..sql_registry import SqlRegistry
88class RegistryTests(ABC):
89 """Generic tests for the `SqlRegistry` class that can be subclassed to
90 generate tests for different configurations.
91 """
93 collectionsManager: str | None = None
94 """Name of the collections manager class, if subclass provides value for
95 this member then it overrides name specified in default configuration
96 (`str`).
97 """
99 datasetsManager: str | dict[str, str] | None = None
100 """Name or configuration dictionary of the datasets manager class, if
101 subclass provides value for this member then it overrides name specified
102 in default configuration (`str` or `dict`).
103 """
105 supportsCollectionRegex: bool = True
106 """True if the registry class being tested supports regex searches for
107 collections."""
109 @classmethod
110 @abstractmethod
111 def getDataDir(cls) -> str:
112 """Return the root directory containing test data YAML files."""
113 raise NotImplementedError()
115 def makeRegistryConfig(self) -> RegistryConfig:
116 """Create RegistryConfig used to create a registry.
118 This method should be called by a subclass from `makeRegistry`.
119 Returned instance will be pre-configured based on the values of class
120 members, and default-configured for all other parameters. Subclasses
121 that need default configuration should just instantiate
122 `RegistryConfig` directly.
123 """
124 config = RegistryConfig()
125 if self.collectionsManager:
126 config["managers", "collections"] = self.collectionsManager
127 if self.datasetsManager:
128 config["managers", "datasets"] = self.datasetsManager
129 return config
131 @abstractmethod
132 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
133 """Return the Registry instance to be tested.
135 Parameters
136 ----------
137 share_repo_with : `Registry`, optional
138 If provided, the new registry should point to the same data
139 repository as this existing registry.
141 Returns
142 -------
143 registry : `Registry`
144 New `Registry` instance, or `None` *only* if `share_repo_with`
145 is not `None` and this test case does not support that argument
146 (e.g. it is impossible with in-memory SQLite DBs).
147 """
148 raise NotImplementedError()
150 def loadData(self, registry: SqlRegistry, filename: str) -> None:
151 """Load registry test data from ``getDataDir/<filename>``,
152 which should be a YAML import/export file.
154 Parameters
155 ----------
156 registry : `SqlRegistry`
157 The registry to load into.
158 filename : `str`
159 The name of the file to load.
160 """
161 from ...transfers import YamlRepoImportBackend
163 with open(os.path.join(self.getDataDir(), filename)) as stream:
164 backend = YamlRepoImportBackend(stream, registry)
165 backend.register()
166 backend.load(datastore=None)
168 def checkQueryResults(self, results, expected):
169 """Check that a query results object contains expected values.
171 Parameters
172 ----------
173 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
174 A lazy-evaluation query results object.
175 expected : `list`
176 A list of `DataCoordinate` o `DatasetRef` objects that should be
177 equal to results of the query, aside from ordering.
178 """
179 self.assertCountEqual(list(results), expected)
180 self.assertEqual(results.count(), len(expected))
181 if expected:
182 self.assertTrue(results.any())
183 else:
184 self.assertFalse(results.any())
186 def testOpaque(self):
187 """Tests for `SqlRegistry.registerOpaqueTable`,
188 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
189 `SqlRegistry.deleteOpaqueData`.
190 """
191 registry = self.makeRegistry()
192 table = "opaque_table_for_testing"
193 registry.registerOpaqueTable(
194 table,
195 spec=ddl.TableSpec(
196 fields=[
197 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
198 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
199 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
200 ],
201 ),
202 )
203 rows = [
204 {"id": 1, "name": "one", "count": None},
205 {"id": 2, "name": "two", "count": 5},
206 {"id": 3, "name": "three", "count": 6},
207 ]
208 registry.insertOpaqueData(table, *rows)
209 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
211 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
212 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
214 # Test very long IN clause which exceeds sqlite limit on number of
215 # parameters. SQLite says the limit is 32k but it looks like it is
216 # much higher.
217 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
218 # Two IN clauses, each longer than 1k batch size, first with
219 # duplicates, second has matching elements in different batches (after
220 # sorting).
221 self.assertEqual(
222 rows[0:2],
223 list(
224 registry.fetchOpaqueData(
225 table,
226 id=list(range(1000)) + list(range(100, 0, -1)),
227 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
228 )
229 ),
230 )
231 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
232 registry.deleteOpaqueData(table, id=3)
233 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
234 registry.deleteOpaqueData(table)
235 self.assertEqual([], list(registry.fetchOpaqueData(table)))
237 def testDatasetType(self):
238 """Tests for `SqlRegistry.registerDatasetType` and
239 `SqlRegistry.getDatasetType`.
240 """
241 registry = self.makeRegistry()
242 # Check valid insert
243 datasetTypeName = "test"
244 storageClass = StorageClass("testDatasetType")
245 registry.storageClasses.registerStorageClass(storageClass)
246 dimensions = registry.dimensions.conform(("instrument", "visit"))
247 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
249 # Inserting for the first time should return True
250 self.assertTrue(registry.registerDatasetType(inDatasetType))
251 outDatasetType1 = registry.getDatasetType(datasetTypeName)
252 self.assertEqual(outDatasetType1, inDatasetType)
254 # Re-inserting should work
255 self.assertFalse(registry.registerDatasetType(inDatasetType))
256 # Except when they are not identical
257 with self.assertRaises(ConflictingDefinitionError):
258 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
259 registry.registerDatasetType(nonIdenticalDatasetType)
261 # Template can be None
262 datasetTypeName = "testNoneTemplate"
263 storageClass = StorageClass("testDatasetType2")
264 registry.storageClasses.registerStorageClass(storageClass)
265 dimensions = registry.dimensions.conform(("instrument", "visit"))
266 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
267 registry.registerDatasetType(inDatasetType)
268 outDatasetType2 = registry.getDatasetType(datasetTypeName)
269 self.assertEqual(outDatasetType2, inDatasetType)
271 allTypes = set(registry.queryDatasetTypes())
272 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
274 # Test some basic queryDatasetTypes functionality
275 missing: list[str] = []
276 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing)
277 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"])
278 self.assertEqual(missing, ["notarealdatasettype"])
280 def testDimensions(self):
281 """Tests for `SqlRegistry.insertDimensionData`,
282 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
283 """
284 registry = self.makeRegistry()
285 dimensionName = "instrument"
286 dimension = registry.dimensions[dimensionName]
287 dimensionValue = {
288 "name": "DummyCam",
289 "visit_max": 10,
290 "visit_system": 0,
291 "exposure_max": 10,
292 "detector_max": 2,
293 "class_name": "lsst.pipe.base.Instrument",
294 }
295 registry.insertDimensionData(dimensionName, dimensionValue)
296 # Inserting the same value twice should fail
297 with self.assertRaises(sqlalchemy.exc.IntegrityError):
298 registry.insertDimensionData(dimensionName, dimensionValue)
299 # expandDataId should retrieve the record we just inserted
300 self.assertEqual(
301 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
302 .records[dimensionName]
303 .toDict(),
304 dimensionValue,
305 )
306 # expandDataId should raise if there is no record with the given ID.
307 with self.assertRaises(DataIdValueError):
308 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
309 # band doesn't have a table; insert should fail.
310 with self.assertRaises(TypeError):
311 registry.insertDimensionData("band", {"band": "i"})
312 dimensionName2 = "physical_filter"
313 dimension2 = registry.dimensions[dimensionName2]
314 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
315 # Missing required dependency ("instrument") should fail
316 with self.assertRaises(KeyError):
317 registry.insertDimensionData(dimensionName2, dimensionValue2)
318 # Adding required dependency should fix the failure
319 dimensionValue2["instrument"] = "DummyCam"
320 registry.insertDimensionData(dimensionName2, dimensionValue2)
321 # expandDataId should retrieve the record we just inserted.
322 self.assertEqual(
323 registry.expandDataId(
324 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
325 )
326 .records[dimensionName2]
327 .toDict(),
328 dimensionValue2,
329 )
330 # Use syncDimensionData to insert a new record successfully.
331 dimensionName3 = "detector"
332 dimensionValue3 = {
333 "instrument": "DummyCam",
334 "id": 1,
335 "full_name": "one",
336 "name_in_raft": "zero",
337 "purpose": "SCIENCE",
338 }
339 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
340 # Sync that again. Note that one field ("raft") is NULL, and that
341 # should be okay.
342 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
343 # Now try that sync with the same primary key but a different value.
344 # This should fail.
345 with self.assertRaises(ConflictingDefinitionError):
346 registry.syncDimensionData(
347 dimensionName3,
348 {
349 "instrument": "DummyCam",
350 "id": 1,
351 "full_name": "one",
352 "name_in_raft": "four",
353 "purpose": "SCIENCE",
354 },
355 )
357 @unittest.skipIf(np is None, "numpy not available.")
358 def testNumpyDataId(self):
359 """Test that we can use a numpy int in a dataId."""
360 registry = self.makeRegistry()
361 dimensionEntries = [
362 ("instrument", {"instrument": "DummyCam"}),
363 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
364 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
365 # Using an np.int64 here fails unless Records.fromDict is also
366 # patched to look for numbers.Integral
367 (
368 "visit",
369 {
370 "instrument": "DummyCam",
371 "id": 42,
372 "name": "fortytwo",
373 "physical_filter": "d-r",
374 "day_obs": 20250101,
375 },
376 ),
377 ]
378 for args in dimensionEntries:
379 registry.insertDimensionData(*args)
381 # Try a normal integer and something that looks like an int but
382 # is not.
383 for visit_id in (42, np.int64(42)):
384 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
385 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
386 self.assertEqual(expanded["visit"], int(visit_id))
387 self.assertIsInstance(expanded["visit"], int)
389 def testDataIdRelationships(self):
390 """Test that `SqlRegistry.expandDataId` raises an exception when the
391 given keys are inconsistent.
392 """
393 registry = self.makeRegistry()
394 self.loadData(registry, "base.yaml")
395 # Insert a few more dimension records for the next test.
396 registry.insertDimensionData(
397 "day_obs",
398 {"instrument": "Cam1", "id": 20250101},
399 )
400 registry.insertDimensionData(
401 "group",
402 {"instrument": "Cam1", "name": "group1"},
403 )
404 registry.insertDimensionData(
405 "exposure",
406 {
407 "instrument": "Cam1",
408 "id": 1,
409 "obs_id": "one",
410 "physical_filter": "Cam1-G",
411 "group": "group1",
412 "day_obs": 20250101,
413 },
414 )
415 registry.insertDimensionData(
416 "group",
417 {"instrument": "Cam1", "name": "group2"},
418 )
419 registry.insertDimensionData(
420 "exposure",
421 {
422 "instrument": "Cam1",
423 "id": 2,
424 "obs_id": "two",
425 "physical_filter": "Cam1-G",
426 "group": "group2",
427 "day_obs": 20250101,
428 },
429 )
430 registry.insertDimensionData(
431 "visit_system",
432 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
433 )
434 registry.insertDimensionData(
435 "visit",
436 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
437 )
438 registry.insertDimensionData(
439 "visit_definition",
440 {"instrument": "Cam1", "visit": 1, "exposure": 1},
441 )
442 with self.assertRaises(InconsistentDataIdError):
443 registry.expandDataId(
444 {"instrument": "Cam1", "visit": 1, "exposure": 2},
445 )
447 def testDataset(self):
448 """Basic tests for `SqlRegistry.insertDatasets`,
449 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
450 """
451 registry = self.makeRegistry()
452 self.loadData(registry, "base.yaml")
453 run = "tésτ"
454 registry.registerRun(run)
455 datasetType = registry.getDatasetType("bias")
456 dataId = {"instrument": "Cam1", "detector": 2}
457 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
458 outRef = registry.getDataset(ref.id)
459 self.assertIsNotNone(ref.id)
460 self.assertEqual(ref, outRef)
461 with self.assertRaises(ConflictingDefinitionError):
462 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
463 registry.removeDatasets([ref])
464 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
466 def testFindDataset(self):
467 """Tests for `SqlRegistry.findDataset`."""
468 registry = self.makeRegistry()
469 self.loadData(registry, "base.yaml")
470 run = "tésτ"
471 datasetType = registry.getDatasetType("bias")
472 dataId = {"instrument": "Cam1", "detector": 4}
473 registry.registerRun(run)
474 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
475 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
476 self.assertEqual(outputRef, inputRef)
477 # Check that retrieval with invalid dataId raises
478 with self.assertRaises(LookupError):
479 dataId = {"instrument": "Cam1"} # no detector
480 registry.findDataset(datasetType, dataId, collections=run)
481 # Check that different dataIds match to different datasets
482 dataId1 = {"instrument": "Cam1", "detector": 1}
483 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
484 dataId2 = {"instrument": "Cam1", "detector": 2}
485 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
486 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
487 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
488 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
489 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
490 # Check that requesting a non-existing dataId returns None
491 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
492 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
493 # Search more than one collection, in which two have the right
494 # dataset type and another does not.
495 registry.registerRun("empty")
496 self.loadData(registry, "datasets.yaml")
497 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
498 self.assertIsNotNone(bias1)
499 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
500 self.assertIsNotNone(bias2)
501 self.assertEqual(
502 bias1,
503 registry.findDataset(
504 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
505 ),
506 )
507 self.assertEqual(
508 bias2,
509 registry.findDataset(
510 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
511 ),
512 )
513 # Search more than one collection, with one of them a CALIBRATION
514 # collection.
515 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
516 timespan = Timespan(
517 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
518 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
519 )
520 registry.certify("Cam1/calib", [bias2], timespan=timespan)
521 self.assertEqual(
522 bias1,
523 registry.findDataset(
524 "bias",
525 instrument="Cam1",
526 detector=2,
527 collections=["empty", "imported_g", "Cam1/calib"],
528 timespan=timespan,
529 ),
530 )
531 self.assertEqual(
532 bias2,
533 registry.findDataset(
534 "bias",
535 instrument="Cam1",
536 detector=2,
537 collections=["empty", "Cam1/calib", "imported_g"],
538 timespan=timespan,
539 ),
540 )
541 # If we try to search those same collections without a timespan, it
542 # should still work, since the CALIBRATION collection is ignored.
543 self.assertEqual(
544 bias1,
545 registry.findDataset(
546 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
547 ),
548 )
549 self.assertEqual(
550 bias1,
551 registry.findDataset(
552 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
553 ),
554 )
556 def testRemoveDatasetTypeSuccess(self):
557 """Test that SqlRegistry.removeDatasetType works when there are no
558 datasets of that type present.
559 """
560 registry = self.makeRegistry()
561 self.loadData(registry, "base.yaml")
562 registry.removeDatasetType("flat")
563 with self.assertRaises(MissingDatasetTypeError):
564 registry.getDatasetType("flat")
566 def testRemoveDatasetTypeFailure(self):
567 """Test that SqlRegistry.removeDatasetType raises when there are
568 datasets of that type present or if the dataset type is for a
569 component.
570 """
571 registry = self.makeRegistry()
572 self.loadData(registry, "base.yaml")
573 self.loadData(registry, "datasets.yaml")
574 with self.assertRaises(OrphanedRecordError):
575 registry.removeDatasetType("flat")
576 with self.assertRaises(DatasetTypeError):
577 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
579 def testImportDatasetsUUID(self):
580 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
581 if isinstance(self.datasetsManager, str):
582 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
583 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
584 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
585 ".ByDimensionsDatasetRecordStorageManagerUUID"
586 ):
587 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
589 registry = self.makeRegistry()
590 self.loadData(registry, "base.yaml")
591 for run in range(6):
592 registry.registerRun(f"run{run}")
593 datasetTypeBias = registry.getDatasetType("bias")
594 datasetTypeFlat = registry.getDatasetType("flat")
595 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
596 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
597 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
599 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
600 (ref1,) = registry._importDatasets([ref])
601 # UUID is used without change
602 self.assertEqual(ref.id, ref1.id)
604 # All different failure modes
605 refs = (
606 # Importing same DatasetRef with different dataset ID is an error
607 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
608 # Same DatasetId but different DataId
609 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
610 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
611 # Same DatasetRef and DatasetId but different run
612 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
613 )
614 for ref in refs:
615 with self.assertRaises(ConflictingDefinitionError):
616 registry._importDatasets([ref])
618 # Test for non-unique IDs, they can be re-imported multiple times.
619 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
620 with self.subTest(idGenMode=idGenMode):
621 # Make dataset ref with reproducible dataset ID.
622 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
623 (ref1,) = registry._importDatasets([ref])
624 self.assertIsInstance(ref1.id, uuid.UUID)
625 self.assertEqual(ref1.id.version, 5)
626 self.assertEqual(ref1.id, ref.id)
628 # Importing it again is OK
629 (ref2,) = registry._importDatasets([ref1])
630 self.assertEqual(ref2.id, ref1.id)
632 # Cannot import to different run with the same ID
633 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
634 with self.assertRaises(ConflictingDefinitionError):
635 registry._importDatasets([ref])
637 ref = DatasetRef(
638 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
639 )
640 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
641 # Cannot import same DATAID_TYPE ref into a new run
642 with self.assertRaises(ConflictingDefinitionError):
643 (ref2,) = registry._importDatasets([ref])
644 else:
645 # DATAID_TYPE_RUN ref can be imported into a new run
646 (ref2,) = registry._importDatasets([ref])
648 def testComponentLookups(self):
649 """Test searching for component datasets via their parents.
651 Components can no longer be found by registry. This test checks
652 that this now fails.
653 """
654 registry = self.makeRegistry()
655 self.loadData(registry, "base.yaml")
656 self.loadData(registry, "datasets.yaml")
657 # Test getting the child dataset type (which does still exist in the
658 # Registry), and check for consistency with
659 # DatasetRef.makeComponentRef.
660 collection = "imported_g"
661 parentType = registry.getDatasetType("bias")
662 childType = registry.getDatasetType("bias.wcs")
663 parentRefResolved = registry.findDataset(
664 parentType, collections=collection, instrument="Cam1", detector=1
665 )
666 self.assertIsInstance(parentRefResolved, DatasetRef)
667 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
668 # Search for a single dataset with findDataset.
669 with self.assertRaises(DatasetTypeError):
670 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
672 def testCollections(self):
673 """Tests for registry methods that manage collections."""
674 registry = self.makeRegistry()
675 other_registry = self.makeRegistry(share_repo_with=registry)
676 self.loadData(registry, "base.yaml")
677 self.loadData(registry, "datasets.yaml")
678 run1 = "imported_g"
679 run2 = "imported_r"
680 # Test setting a collection docstring after it has been created.
681 registry.setCollectionDocumentation(run1, "doc for run1")
682 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
683 registry.setCollectionDocumentation(run1, None)
684 self.assertIsNone(registry.getCollectionDocumentation(run1))
685 datasetType = "bias"
686 # Find some datasets via their run's collection.
687 dataId1 = {"instrument": "Cam1", "detector": 1}
688 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
689 self.assertIsNotNone(ref1)
690 dataId2 = {"instrument": "Cam1", "detector": 2}
691 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
692 self.assertIsNotNone(ref2)
693 # Associate those into a new collection, then look for them there.
694 tag1 = "tag1"
695 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
696 # Check that we can query for old and new collections by type.
697 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
698 self.assertEqual(
699 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
700 {tag1, run1, run2},
701 )
702 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
703 registry.associate(tag1, [ref1, ref2])
704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
706 # Disassociate one and verify that we can't it there anymore...
707 registry.disassociate(tag1, [ref1])
708 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
709 # ...but we can still find ref2 in tag1, and ref1 in the run.
710 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
711 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
712 collections = set(registry.queryCollections())
713 self.assertEqual(collections, {run1, run2, tag1})
714 # Associate both refs into tag1 again; ref2 is already there, but that
715 # should be a harmless no-op.
716 registry.associate(tag1, [ref1, ref2])
717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
719 # Get a different dataset (from a different run) that has the same
720 # dataset type and data ID as ref2.
721 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
722 self.assertNotEqual(ref2, ref2b)
723 # Attempting to associate that into tag1 should be an error.
724 with self.assertRaises(ConflictingDefinitionError):
725 registry.associate(tag1, [ref2b])
726 # That error shouldn't have messed up what we had before.
727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
729 # Attempt to associate the conflicting dataset again, this time with
730 # a dataset that isn't in the collection and won't cause a conflict.
731 # Should also fail without modifying anything.
732 dataId3 = {"instrument": "Cam1", "detector": 3}
733 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
734 with self.assertRaises(ConflictingDefinitionError):
735 registry.associate(tag1, [ref3, ref2b])
736 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
737 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
738 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
739 # Register a chained collection that searches [tag1, run2]
740 chain1 = "chain1"
741 registry.registerCollection(chain1, type=CollectionType.CHAINED)
742 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
743 # Chained collection exists, but has no collections in it.
744 self.assertFalse(registry.getCollectionChain(chain1))
745 # If we query for all collections, we should get the chained collection
746 # only if we don't ask to flatten it (i.e. yield only its children).
747 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
748 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
749 # Attempt to set its child collections to something circular; that
750 # should fail.
751 with self.assertRaises(ValueError):
752 registry.setCollectionChain(chain1, [tag1, chain1])
753 # Add the child collections.
754 registry.setCollectionChain(chain1, [tag1, run2])
755 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
756 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
757 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
758 # Refresh the other registry that points to the same repo, and make
759 # sure it can see the things we've done (note that this does require
760 # an explicit refresh(); that's the documented behavior, because
761 # caching is ~impossible otherwise).
762 if other_registry is not None:
763 other_registry.refresh()
764 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
765 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
766 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
767 # Searching for dataId1 or dataId2 in the chain should return ref1 and
768 # ref2, because both are in tag1.
769 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
770 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
771 # Now disassociate ref2 from tag1. The search (for bias) with
772 # dataId2 in chain1 should then:
773 # 1. not find it in tag1
774 # 2. find a different dataset in run2
775 registry.disassociate(tag1, [ref2])
776 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
777 self.assertNotEqual(ref2b, ref2)
778 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
779 # Define a new chain so we can test recursive chains.
780 chain2 = "chain2"
781 registry.registerCollection(chain2, type=CollectionType.CHAINED)
782 registry.setCollectionChain(chain2, [run2, chain1])
783 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
784 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
786 if self.supportsCollectionRegex:
787 # Query for collections matching a regex.
788 self.assertCountEqual(
789 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
790 ["imported_r", "imported_g"],
791 )
792 # Query for collections matching a regex or an explicit str.
793 self.assertCountEqual(
794 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
795 ["imported_r", "imported_g", "chain1"],
796 )
797 # Same queries as the regex ones above, but using globs instead of
798 # regex.
799 self.assertCountEqual(
800 list(registry.queryCollections("imported_*", flattenChains=False)),
801 ["imported_r", "imported_g"],
802 )
803 # Query for collections matching a regex or an explicit str.
804 self.assertCountEqual(
805 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
806 ["imported_r", "imported_g", "chain1"],
807 )
809 # Search for bias with dataId1 should find it via tag1 in chain2,
810 # recursing, because is not in run1.
811 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
812 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
813 # Search for bias with dataId2 should find it in run2 (ref2b).
814 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
815 # Search for a flat that is in run2. That should not be found
816 # at the front of chain2, because of the restriction to bias
817 # on run2 there, but it should be found in at the end of chain1.
818 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
819 ref4 = registry.findDataset("flat", dataId4, collections=run2)
820 self.assertIsNotNone(ref4)
821 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
822 # Deleting a collection that's part of a CHAINED collection is not
823 # allowed, and is exception-safe.
824 with self.assertRaises(sqlalchemy.exc.IntegrityError):
825 registry.removeCollection(run2)
826 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
827 with self.assertRaises(sqlalchemy.exc.IntegrityError):
828 registry.removeCollection(chain1)
829 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
830 # Actually remove chain2, test that it's gone by asking for its type.
831 registry.removeCollection(chain2)
832 with self.assertRaises(MissingCollectionError):
833 registry.getCollectionType(chain2)
834 # Actually remove run2 and chain1, which should work now.
835 registry.removeCollection(chain1)
836 registry.removeCollection(run2)
837 with self.assertRaises(MissingCollectionError):
838 registry.getCollectionType(run2)
839 with self.assertRaises(MissingCollectionError):
840 registry.getCollectionType(chain1)
841 # Remove tag1 as well, just to test that we can remove TAGGED
842 # collections.
843 registry.removeCollection(tag1)
844 with self.assertRaises(MissingCollectionError):
845 registry.getCollectionType(tag1)
847 def testCollectionChainCaching(self):
848 registry = self.makeRegistry()
849 with registry.caching_context():
850 registry.registerCollection("a")
851 registry.registerCollection("chain", CollectionType.CHAINED)
852 # There used to be a caching bug (DM-43750) that would throw an
853 # exception if you modified a collection chain for a collection
854 # that was already in the cache.
855 registry.setCollectionChain("chain", ["a"])
856 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"])
858 def testCollectionChainFlatten(self):
859 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
860 option.
861 """
862 registry = self.makeRegistry()
863 registry.registerCollection("inner", CollectionType.CHAINED)
864 registry.registerCollection("innermost", CollectionType.RUN)
865 registry.setCollectionChain("inner", ["innermost"])
866 registry.registerCollection("outer", CollectionType.CHAINED)
867 registry.setCollectionChain("outer", ["inner"], flatten=False)
868 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
869 registry.setCollectionChain("outer", ["inner"], flatten=True)
870 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
872 def testCollectionChainPrependConcurrency(self):
873 """Verify that locking via database row locks is working as
874 expected.
875 """
877 def blocked_thread_func(registry: SqlRegistry):
878 # This call will become blocked after it has decided on positions
879 # for the new children in the collection chain, but before
880 # inserting them.
881 registry._managers.collections.prepend_collection_chain("chain", ["a"])
883 def unblocked_thread_func(registry: SqlRegistry):
884 registry._managers.collections.prepend_collection_chain("chain", ["b"])
886 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
888 # blocked_thread_func should have finished first, inserting "a".
889 # unblocked_thread_func should have finished second, prepending "b".
890 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
892 def testCollectionChainReplaceConcurrency(self):
893 """Verify that locking via database row locks is working as
894 expected.
895 """
897 def blocked_thread_func(registry: SqlRegistry):
898 # This call will become blocked after deleting children, but before
899 # inserting new ones.
900 registry.setCollectionChain("chain", ["a"])
902 def unblocked_thread_func(registry: SqlRegistry):
903 registry.setCollectionChain("chain", ["b"])
905 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
907 # blocked_thread_func should have finished first.
908 # unblocked_thread_func should have finished second, overwriting the
909 # chain with "b".
910 self.assertEqual(("b",), registry.getCollectionChain("chain"))
912 def _do_collection_concurrency_test(
913 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]]
914 ) -> SqlRegistry:
915 # This function:
916 # 1. Sets up two registries pointing at the same database.
917 # 2. Start running 'blocked_thread_func' in a background thread,
918 # arranging for it to become blocked during a critical section in
919 # the collections manager.
920 # 3. Wait for 'blocked_thread_func' to reach the critical section
921 # 4. Start running 'unblocked_thread_func'.
922 # 5. Allow both functions to run to completion.
924 # Set up two registries pointing to the same DB
925 registry1 = self.makeRegistry()
926 assert isinstance(registry1, SqlRegistry)
927 registry2 = self.makeRegistry(share_repo_with=registry1)
928 if registry2 is None:
929 # This will happen for in-memory SQL databases.
930 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
932 registry1.registerCollection("chain", CollectionType.CHAINED)
933 for collection in ["a", "b"]:
934 registry1.registerCollection(collection)
936 # Arrange for registry1 to block during its critical section, allowing
937 # us to detect this and control when it becomes unblocked.
938 enter_barrier = Barrier(2, timeout=60)
939 exit_barrier = Barrier(2, timeout=60)
941 def wait_for_barrier():
942 enter_barrier.wait()
943 exit_barrier.wait()
945 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
947 with ThreadPoolExecutor(max_workers=1) as exec1:
948 with ThreadPoolExecutor(max_workers=1) as exec2:
949 future1 = exec1.submit(blocked_thread_func, registry1)
950 enter_barrier.wait()
952 # At this point registry 1 has entered the critical section and
953 # is waiting for us to release it. Start the other thread.
954 future2 = exec2.submit(unblocked_thread_func, registry2)
955 # thread2 should block inside a database call, but we have no
956 # way to detect when it is in this state.
957 time.sleep(0.200)
959 # Let the threads run to completion.
960 exit_barrier.wait()
961 future1.result()
962 future2.result()
964 return registry1
966 def testBasicTransaction(self):
967 """Test that all operations within a single transaction block are
968 rolled back if an exception propagates out of the block.
969 """
970 registry = self.makeRegistry()
971 storageClass = StorageClass("testDatasetType")
972 registry.storageClasses.registerStorageClass(storageClass)
973 with registry.transaction():
974 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
975 with self.assertRaises(ValueError):
976 with registry.transaction():
977 registry.insertDimensionData("instrument", {"name": "Cam2"})
978 raise ValueError("Oops, something went wrong")
979 # Cam1 should exist
980 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
981 # But Cam2 and Cam3 should both not exist
982 with self.assertRaises(DataIdValueError):
983 registry.expandDataId(instrument="Cam2")
984 with self.assertRaises(DataIdValueError):
985 registry.expandDataId(instrument="Cam3")
987 def testNestedTransaction(self):
988 """Test that operations within a transaction block are not rolled back
989 if an exception propagates out of an inner transaction block and is
990 then caught.
991 """
992 registry = self.makeRegistry()
993 dimension = registry.dimensions["instrument"]
994 dataId1 = {"instrument": "DummyCam"}
995 dataId2 = {"instrument": "DummyCam2"}
996 checkpointReached = False
997 with registry.transaction():
998 # This should be added and (ultimately) committed.
999 registry.insertDimensionData(dimension, dataId1)
1000 with self.assertRaises(sqlalchemy.exc.IntegrityError):
1001 with registry.transaction(savepoint=True):
1002 # This does not conflict, and should succeed (but not
1003 # be committed).
1004 registry.insertDimensionData(dimension, dataId2)
1005 checkpointReached = True
1006 # This should conflict and raise, triggerring a rollback
1007 # of the previous insertion within the same transaction
1008 # context, but not the original insertion in the outer
1009 # block.
1010 registry.insertDimensionData(dimension, dataId1)
1011 self.assertTrue(checkpointReached)
1012 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
1013 with self.assertRaises(DataIdValueError):
1014 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
1016 def testInstrumentDimensions(self):
1017 """Test queries involving only instrument dimensions, with no joins to
1018 skymap.
1019 """
1020 registry = self.makeRegistry()
1022 # need a bunch of dimensions and datasets for test
1023 registry.insertDimensionData(
1024 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1025 )
1026 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1027 registry.insertDimensionData(
1028 "physical_filter",
1029 dict(instrument="DummyCam", name="dummy_r", band="r"),
1030 dict(instrument="DummyCam", name="dummy_i", band="i"),
1031 )
1032 registry.insertDimensionData(
1033 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1034 )
1035 registry.insertDimensionData(
1036 "visit",
1037 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1038 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1039 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1040 )
1041 registry.insertDimensionData(
1042 "group",
1043 dict(instrument="DummyCam", name="ten"),
1044 dict(instrument="DummyCam", name="eleven"),
1045 dict(instrument="DummyCam", name="twelve"),
1046 )
1047 for i in range(1, 6):
1048 registry.insertDimensionData(
1049 "visit_detector_region",
1050 dict(instrument="DummyCam", visit=10, detector=i),
1051 dict(instrument="DummyCam", visit=11, detector=i),
1052 dict(instrument="DummyCam", visit=20, detector=i),
1053 )
1054 registry.insertDimensionData(
1055 "exposure",
1056 dict(
1057 instrument="DummyCam",
1058 id=100,
1059 obs_id="100",
1060 physical_filter="dummy_i",
1061 group="ten",
1062 day_obs=20250101,
1063 ),
1064 dict(
1065 instrument="DummyCam",
1066 id=101,
1067 obs_id="101",
1068 physical_filter="dummy_i",
1069 group="ten",
1070 day_obs=20250101,
1071 ),
1072 dict(
1073 instrument="DummyCam",
1074 id=110,
1075 obs_id="110",
1076 physical_filter="dummy_r",
1077 group="eleven",
1078 day_obs=20250101,
1079 ),
1080 dict(
1081 instrument="DummyCam",
1082 id=111,
1083 obs_id="111",
1084 physical_filter="dummy_r",
1085 group="eleven",
1086 day_obs=20250101,
1087 ),
1088 dict(
1089 instrument="DummyCam",
1090 id=200,
1091 obs_id="200",
1092 physical_filter="dummy_r",
1093 group="twelve",
1094 day_obs=20250101,
1095 ),
1096 dict(
1097 instrument="DummyCam",
1098 id=201,
1099 obs_id="201",
1100 physical_filter="dummy_r",
1101 group="twelve",
1102 day_obs=20250101,
1103 ),
1104 )
1105 registry.insertDimensionData(
1106 "visit_definition",
1107 dict(instrument="DummyCam", exposure=100, visit=10),
1108 dict(instrument="DummyCam", exposure=101, visit=10),
1109 dict(instrument="DummyCam", exposure=110, visit=11),
1110 dict(instrument="DummyCam", exposure=111, visit=11),
1111 dict(instrument="DummyCam", exposure=200, visit=20),
1112 dict(instrument="DummyCam", exposure=201, visit=20),
1113 )
1114 # dataset types
1115 run1 = "test1_r"
1116 run2 = "test2_r"
1117 tagged2 = "test2_t"
1118 registry.registerRun(run1)
1119 registry.registerRun(run2)
1120 registry.registerCollection(tagged2)
1121 storageClass = StorageClass("testDataset")
1122 registry.storageClasses.registerStorageClass(storageClass)
1123 rawType = DatasetType(
1124 name="RAW",
1125 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1126 storageClass=storageClass,
1127 )
1128 registry.registerDatasetType(rawType)
1129 calexpType = DatasetType(
1130 name="CALEXP",
1131 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1132 storageClass=storageClass,
1133 )
1134 registry.registerDatasetType(calexpType)
1136 # add pre-existing datasets
1137 for exposure in (100, 101, 110, 111):
1138 for detector in (1, 2, 3):
1139 # note that only 3 of 5 detectors have datasets
1140 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1141 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1142 # exposures 100 and 101 appear in both run1 and tagged2.
1143 # 100 has different datasets in the different collections
1144 # 101 has the same dataset in both collections.
1145 if exposure == 100:
1146 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1147 if exposure in (100, 101):
1148 registry.associate(tagged2, [ref])
1149 # Add pre-existing datasets to tagged2.
1150 for exposure in (200, 201):
1151 for detector in (3, 4, 5):
1152 # note that only 3 of 5 detectors have datasets
1153 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1154 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1155 registry.associate(tagged2, [ref])
1157 dimensions = registry.dimensions.conform(
1158 rawType.dimensions.required.names | calexpType.dimensions.required.names
1159 )
1160 # Test that single dim string works as well as list of str
1161 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1162 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1163 self.assertEqual(rows, rowsI)
1164 # with empty expression
1165 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1166 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1167 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1168 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1169 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1171 # second collection
1172 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1173 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1174 for dataId in rows:
1175 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1176 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1177 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1178 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1180 # with two input datasets
1181 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1182 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1183 for dataId in rows:
1184 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1185 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1186 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1187 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1189 # limit to single visit
1190 rows = registry.queryDataIds(
1191 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1192 ).toSet()
1193 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1194 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1195 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1196 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1198 # more limiting expression, using link names instead of Table.column
1199 rows = registry.queryDataIds(
1200 dimensions,
1201 datasets=rawType,
1202 collections=run1,
1203 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1204 ).toSet()
1205 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1206 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1207 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1208 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1210 # queryDataIds with only one of `datasets` and `collections` is an
1211 # error.
1212 with self.assertRaises(CollectionError):
1213 registry.queryDataIds(dimensions, datasets=rawType)
1214 with self.assertRaises(ArgumentError):
1215 registry.queryDataIds(dimensions, collections=run1)
1217 # expression excludes everything
1218 rows = registry.queryDataIds(
1219 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1220 ).toSet()
1221 self.assertEqual(len(rows), 0)
1223 # Selecting by physical_filter, this is not in the dimensions, but it
1224 # is a part of the full expression so it should work too.
1225 rows = registry.queryDataIds(
1226 dimensions,
1227 datasets=rawType,
1228 collections=run1,
1229 where="physical_filter = 'dummy_r'",
1230 instrument="DummyCam",
1231 ).toSet()
1232 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1233 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1234 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1235 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1237 def testSkyMapDimensions(self):
1238 """Tests involving only skymap dimensions, no joins to instrument."""
1239 registry = self.makeRegistry()
1241 # need a bunch of dimensions and datasets for test, we want
1242 # "band" in the test so also have to add physical_filter
1243 # dimensions
1244 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1245 registry.insertDimensionData(
1246 "physical_filter",
1247 dict(instrument="DummyCam", name="dummy_r", band="r"),
1248 dict(instrument="DummyCam", name="dummy_i", band="i"),
1249 )
1250 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1251 for tract in range(10):
1252 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1253 registry.insertDimensionData(
1254 "patch",
1255 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1256 )
1258 # dataset types
1259 run = "tésτ"
1260 registry.registerRun(run)
1261 storageClass = StorageClass("testDataset")
1262 registry.storageClasses.registerStorageClass(storageClass)
1263 calexpType = DatasetType(
1264 name="deepCoadd_calexp",
1265 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1266 storageClass=storageClass,
1267 )
1268 registry.registerDatasetType(calexpType)
1269 mergeType = DatasetType(
1270 name="deepCoadd_mergeDet",
1271 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1272 storageClass=storageClass,
1273 )
1274 registry.registerDatasetType(mergeType)
1275 measType = DatasetType(
1276 name="deepCoadd_meas",
1277 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1278 storageClass=storageClass,
1279 )
1280 registry.registerDatasetType(measType)
1282 dimensions = registry.dimensions.conform(
1283 calexpType.dimensions.required.names
1284 | mergeType.dimensions.required.names
1285 | measType.dimensions.required.names
1286 )
1288 # add pre-existing datasets
1289 for tract in (1, 3, 5):
1290 for patch in (2, 4, 6, 7):
1291 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1292 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1293 for aFilter in ("i", "r"):
1294 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1295 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1297 # with empty expression
1298 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1299 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1300 for dataId in rows:
1301 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1302 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1303 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1304 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1306 # limit to 2 tracts and 2 patches
1307 rows = registry.queryDataIds(
1308 dimensions,
1309 datasets=[calexpType, mergeType],
1310 collections=run,
1311 where="tract IN (1, 5) AND patch IN (2, 7)",
1312 skymap="DummyMap",
1313 ).toSet()
1314 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1315 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1316 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1317 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1319 # limit to single filter
1320 rows = registry.queryDataIds(
1321 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1322 ).toSet()
1323 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1324 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1325 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1326 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1328 # Specifying non-existing skymap is an exception
1329 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1330 rows = registry.queryDataIds(
1331 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1332 ).toSet()
1334 def testSpatialJoin(self):
1335 """Test queries that involve spatial overlap joins."""
1336 registry = self.makeRegistry()
1337 self.loadData(registry, "hsc-rc2-subset.yaml")
1339 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1340 # the TopologicalFamily they belong to. We'll relate all elements in
1341 # each family to all of the elements in each other family.
1342 families = defaultdict(set)
1343 # Dictionary of {element.name: {dataId: region}}.
1344 regions = {}
1345 for element in registry.dimensions.database_elements:
1346 if element.spatial is not None:
1347 families[element.spatial.name].add(element)
1348 regions[element.name] = {
1349 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1350 }
1352 # If this check fails, it's not necessarily a problem - it may just be
1353 # a reasonable change to the default dimension definitions - but the
1354 # test below depends on there being more than one family to do anything
1355 # useful.
1356 self.assertEqual(len(families), 2)
1358 # Overlap DatabaseDimensionElements with each other.
1359 for family1, family2 in itertools.combinations(families, 2):
1360 for element1, element2 in itertools.product(families[family1], families[family2]):
1361 dimensions = element1.minimal_group | element2.minimal_group
1362 # Construct expected set of overlapping data IDs via a
1363 # brute-force comparison of the regions we've already fetched.
1364 expected = {
1365 DataCoordinate.standardize(
1366 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1367 )
1368 for (dataId1, region1), (dataId2, region2) in itertools.product(
1369 regions[element1.name].items(), regions[element2.name].items()
1370 )
1371 if not region1.isDisjointFrom(region2)
1372 }
1373 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1374 queried = set(registry.queryDataIds(dimensions))
1375 self.assertEqual(expected, queried)
1377 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1378 commonSkyPix = registry.dimensions.commonSkyPix
1379 for elementName, these_regions in regions.items():
1380 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1381 expected = set()
1382 for dataId, region in these_regions.items():
1383 for begin, end in commonSkyPix.pixelization.envelope(region):
1384 expected.update(
1385 DataCoordinate.standardize(
1386 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1387 )
1388 for index in range(begin, end)
1389 )
1390 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1391 queried = set(registry.queryDataIds(dimensions))
1392 self.assertEqual(expected, queried)
1394 def testAbstractQuery(self):
1395 """Test that we can run a query that just lists the known
1396 bands. This is tricky because band is
1397 backed by a query against physical_filter.
1398 """
1399 registry = self.makeRegistry()
1400 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1401 registry.insertDimensionData(
1402 "physical_filter",
1403 dict(instrument="DummyCam", name="dummy_i", band="i"),
1404 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1405 dict(instrument="DummyCam", name="dummy_r", band="r"),
1406 )
1407 rows = registry.queryDataIds(["band"]).toSet()
1408 self.assertCountEqual(
1409 rows,
1410 [
1411 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1412 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1413 ],
1414 )
1416 def testAttributeManager(self):
1417 """Test basic functionality of attribute manager."""
1418 # number of attributes with schema versions in a fresh database,
1419 # 6 managers with 2 records per manager, plus config for dimensions
1420 VERSION_COUNT = 6 * 2 + 1
1422 registry = self.makeRegistry()
1423 attributes = registry._managers.attributes
1425 # check what get() returns for non-existing key
1426 self.assertIsNone(attributes.get("attr"))
1427 self.assertEqual(attributes.get("attr", ""), "")
1428 self.assertEqual(attributes.get("attr", "Value"), "Value")
1429 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1431 # cannot store empty key or value
1432 with self.assertRaises(ValueError):
1433 attributes.set("", "value")
1434 with self.assertRaises(ValueError):
1435 attributes.set("attr", "")
1437 # set value of non-existing key
1438 attributes.set("attr", "value")
1439 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1440 self.assertEqual(attributes.get("attr"), "value")
1442 # update value of existing key
1443 with self.assertRaises(ButlerAttributeExistsError):
1444 attributes.set("attr", "value2")
1446 attributes.set("attr", "value2", force=True)
1447 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1448 self.assertEqual(attributes.get("attr"), "value2")
1450 # delete existing key
1451 self.assertTrue(attributes.delete("attr"))
1452 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1454 # delete non-existing key
1455 self.assertFalse(attributes.delete("non-attr"))
1457 # store bunch of keys and get the list back
1458 data = [
1459 ("version.core", "1.2.3"),
1460 ("version.dimensions", "3.2.1"),
1461 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1462 ]
1463 for key, value in data:
1464 attributes.set(key, value)
1465 items = dict(attributes.items())
1466 for key, value in data:
1467 self.assertEqual(items[key], value)
1469 def testQueryDatasetsDeduplication(self):
1470 """Test that the findFirst option to queryDatasets selects datasets
1471 from collections in the order given".
1472 """
1473 registry = self.makeRegistry()
1474 self.loadData(registry, "base.yaml")
1475 self.loadData(registry, "datasets.yaml")
1476 self.assertCountEqual(
1477 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1478 [
1479 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1480 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1481 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1482 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1483 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1484 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1485 ],
1486 )
1487 self.assertCountEqual(
1488 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1489 [
1490 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1491 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1492 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1493 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1494 ],
1495 )
1496 self.assertCountEqual(
1497 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1498 [
1499 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1500 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1501 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1502 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1503 ],
1504 )
1506 def testQueryResults(self):
1507 """Test querying for data IDs and then manipulating the QueryResults
1508 object returned to perform other queries.
1509 """
1510 registry = self.makeRegistry()
1511 self.loadData(registry, "base.yaml")
1512 self.loadData(registry, "datasets.yaml")
1513 bias = registry.getDatasetType("bias")
1514 flat = registry.getDatasetType("flat")
1515 # Obtain expected results from methods other than those we're testing
1516 # here. That includes:
1517 # - the dimensions of the data IDs we want to query:
1518 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1519 # - the dimensions of some other data IDs we'll extract from that:
1520 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1521 # - the data IDs we expect to obtain from the first queries:
1522 expectedDataIds = DataCoordinateSet(
1523 {
1524 DataCoordinate.standardize(
1525 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1526 )
1527 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1528 },
1529 dimensions=expected_dimensions,
1530 hasFull=False,
1531 hasRecords=False,
1532 )
1533 # - the flat datasets we expect to find from those data IDs, in just
1534 # one collection (so deduplication is irrelevant):
1535 expectedFlats = [
1536 registry.findDataset(
1537 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1538 ),
1539 registry.findDataset(
1540 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1541 ),
1542 registry.findDataset(
1543 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1544 ),
1545 ]
1546 # - the data IDs we expect to extract from that:
1547 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1548 # - the bias datasets we expect to find from those data IDs, after we
1549 # subset-out the physical_filter dimension, both with duplicates:
1550 expectedAllBiases = [
1551 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1552 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1553 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1554 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1555 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1556 ]
1557 # - ...and without duplicates:
1558 expectedDeduplicatedBiases = [
1559 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1560 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1561 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1562 ]
1563 # Test against those expected results, using a "lazy" query for the
1564 # data IDs (which re-executes that query each time we use it to do
1565 # something new).
1566 dataIds = registry.queryDataIds(
1567 ["detector", "physical_filter"],
1568 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1569 instrument="Cam1",
1570 )
1571 self.assertEqual(dataIds.dimensions, expected_dimensions)
1572 self.assertEqual(dataIds.toSet(), expectedDataIds)
1573 self.assertCountEqual(
1574 list(
1575 dataIds.findDatasets(
1576 flat,
1577 collections=["imported_r"],
1578 )
1579 ),
1580 expectedFlats,
1581 )
1582 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1583 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1584 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1585 self.assertCountEqual(
1586 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1587 expectedAllBiases,
1588 )
1589 self.assertCountEqual(
1590 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1591 expectedDeduplicatedBiases,
1592 )
1594 # Searching for a dataset with dimensions we had projected away
1595 # restores those dimensions.
1596 self.assertCountEqual(
1597 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1598 expectedFlats,
1599 )
1601 # Use a named dataset type that does not exist and a dataset type
1602 # object that does not exist.
1603 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1605 # Test both string name and dataset type object.
1606 test_type: str | DatasetType
1607 for test_type, test_type_name in (
1608 (unknown_type, unknown_type.name),
1609 (unknown_type.name, unknown_type.name),
1610 ):
1611 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1612 list(
1613 subsetDataIds.findDatasets(
1614 test_type, collections=["imported_r", "imported_g"], findFirst=True
1615 )
1616 )
1618 # Materialize the bias dataset queries (only) by putting the results
1619 # into temporary tables, then repeat those tests.
1620 with subsetDataIds.findDatasets(
1621 bias, collections=["imported_r", "imported_g"], findFirst=False
1622 ).materialize() as biases:
1623 self.assertCountEqual(list(biases), expectedAllBiases)
1624 with subsetDataIds.findDatasets(
1625 bias, collections=["imported_r", "imported_g"], findFirst=True
1626 ).materialize() as biases:
1627 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1628 # Materialize the data ID subset query, but not the dataset queries.
1629 with subsetDataIds.materialize() as subsetDataIds:
1630 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1631 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1632 self.assertCountEqual(
1633 list(
1634 subsetDataIds.findDatasets(
1635 bias, collections=["imported_r", "imported_g"], findFirst=False
1636 )
1637 ),
1638 expectedAllBiases,
1639 )
1640 self.assertCountEqual(
1641 list(
1642 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1643 ),
1644 expectedDeduplicatedBiases,
1645 )
1646 # Materialize the dataset queries, too.
1647 with subsetDataIds.findDatasets(
1648 bias, collections=["imported_r", "imported_g"], findFirst=False
1649 ).materialize() as biases:
1650 self.assertCountEqual(list(biases), expectedAllBiases)
1651 with subsetDataIds.findDatasets(
1652 bias, collections=["imported_r", "imported_g"], findFirst=True
1653 ).materialize() as biases:
1654 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1655 # Materialize the original query, but none of the follow-up queries.
1656 with dataIds.materialize() as dataIds:
1657 self.assertEqual(dataIds.dimensions, expected_dimensions)
1658 self.assertEqual(dataIds.toSet(), expectedDataIds)
1659 self.assertCountEqual(
1660 list(
1661 dataIds.findDatasets(
1662 flat,
1663 collections=["imported_r"],
1664 )
1665 ),
1666 expectedFlats,
1667 )
1668 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1669 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1670 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1671 self.assertCountEqual(
1672 list(
1673 subsetDataIds.findDatasets(
1674 bias, collections=["imported_r", "imported_g"], findFirst=False
1675 )
1676 ),
1677 expectedAllBiases,
1678 )
1679 self.assertCountEqual(
1680 list(
1681 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1682 ),
1683 expectedDeduplicatedBiases,
1684 )
1685 # Materialize just the bias dataset queries.
1686 with subsetDataIds.findDatasets(
1687 bias, collections=["imported_r", "imported_g"], findFirst=False
1688 ).materialize() as biases:
1689 self.assertCountEqual(list(biases), expectedAllBiases)
1690 with subsetDataIds.findDatasets(
1691 bias, collections=["imported_r", "imported_g"], findFirst=True
1692 ).materialize() as biases:
1693 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1694 # Materialize the subset data ID query, but not the dataset
1695 # queries.
1696 with subsetDataIds.materialize() as subsetDataIds:
1697 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1698 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1699 self.assertCountEqual(
1700 list(
1701 subsetDataIds.findDatasets(
1702 bias, collections=["imported_r", "imported_g"], findFirst=False
1703 )
1704 ),
1705 expectedAllBiases,
1706 )
1707 self.assertCountEqual(
1708 list(
1709 subsetDataIds.findDatasets(
1710 bias, collections=["imported_r", "imported_g"], findFirst=True
1711 )
1712 ),
1713 expectedDeduplicatedBiases,
1714 )
1715 # Materialize the bias dataset queries, too, so now we're
1716 # materializing every single step.
1717 with subsetDataIds.findDatasets(
1718 bias, collections=["imported_r", "imported_g"], findFirst=False
1719 ).materialize() as biases:
1720 self.assertCountEqual(list(biases), expectedAllBiases)
1721 with subsetDataIds.findDatasets(
1722 bias, collections=["imported_r", "imported_g"], findFirst=True
1723 ).materialize() as biases:
1724 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1726 def testStorageClassPropagation(self):
1727 """Test that queries for datasets respect the storage class passed in
1728 as part of a full dataset type.
1729 """
1730 registry = self.makeRegistry()
1731 self.loadData(registry, "base.yaml")
1732 dataset_type_in_registry = DatasetType(
1733 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1734 )
1735 registry.registerDatasetType(dataset_type_in_registry)
1736 run = "run1"
1737 registry.registerRun(run)
1738 (inserted_ref,) = registry.insertDatasets(
1739 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1740 )
1741 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1742 query_dataset_type = DatasetType(
1743 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1744 )
1745 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1746 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1747 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1748 (query_datasets_ref,) = query_datasets_result
1749 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1750 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1751 query_dataset_type, collections=[run]
1752 )
1753 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1754 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1755 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1756 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1757 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1758 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1759 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1761 def testEmptyDimensionsQueries(self):
1762 """Test Query and QueryResults objects in the case where there are no
1763 dimensions.
1764 """
1765 # Set up test data: one dataset type, two runs, one dataset in each.
1766 registry = self.makeRegistry()
1767 self.loadData(registry, "base.yaml")
1768 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1769 registry.registerDatasetType(schema)
1770 dataId = DataCoordinate.make_empty(registry.dimensions)
1771 run1 = "run1"
1772 run2 = "run2"
1773 registry.registerRun(run1)
1774 registry.registerRun(run2)
1775 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1776 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1777 # Query directly for both of the datasets, and each one, one at a time.
1778 self.checkQueryResults(
1779 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1780 )
1781 self.checkQueryResults(
1782 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1783 [dataset1],
1784 )
1785 self.checkQueryResults(
1786 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1787 [dataset2],
1788 )
1789 # Query for data IDs with no dimensions.
1790 dataIds = registry.queryDataIds([])
1791 self.checkQueryResults(dataIds, [dataId])
1792 # Use queried data IDs to find the datasets.
1793 self.checkQueryResults(
1794 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1795 [dataset1, dataset2],
1796 )
1797 self.checkQueryResults(
1798 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1799 [dataset1],
1800 )
1801 self.checkQueryResults(
1802 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1803 [dataset2],
1804 )
1805 # Now materialize the data ID query results and repeat those tests.
1806 with dataIds.materialize() as dataIds:
1807 self.checkQueryResults(dataIds, [dataId])
1808 self.checkQueryResults(
1809 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1810 [dataset1],
1811 )
1812 self.checkQueryResults(
1813 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1814 [dataset2],
1815 )
1816 # Query for non-empty data IDs, then subset that to get the empty one.
1817 # Repeat the above tests starting from that.
1818 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1819 self.checkQueryResults(dataIds, [dataId])
1820 self.checkQueryResults(
1821 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1822 [dataset1, dataset2],
1823 )
1824 self.checkQueryResults(
1825 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1826 [dataset1],
1827 )
1828 self.checkQueryResults(
1829 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1830 [dataset2],
1831 )
1832 with dataIds.materialize() as dataIds:
1833 self.checkQueryResults(dataIds, [dataId])
1834 self.checkQueryResults(
1835 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1836 [dataset1, dataset2],
1837 )
1838 self.checkQueryResults(
1839 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1840 [dataset1],
1841 )
1842 self.checkQueryResults(
1843 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1844 [dataset2],
1845 )
1846 # Query for non-empty data IDs, then materialize, then subset to get
1847 # the empty one. Repeat again.
1848 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1849 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1850 self.checkQueryResults(dataIds, [dataId])
1851 self.checkQueryResults(
1852 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1853 [dataset1, dataset2],
1854 )
1855 self.checkQueryResults(
1856 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1857 [dataset1],
1858 )
1859 self.checkQueryResults(
1860 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1861 [dataset2],
1862 )
1863 with dataIds.materialize() as dataIds:
1864 self.checkQueryResults(dataIds, [dataId])
1865 self.checkQueryResults(
1866 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1867 [dataset1, dataset2],
1868 )
1869 self.checkQueryResults(
1870 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1871 [dataset1],
1872 )
1873 self.checkQueryResults(
1874 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1875 [dataset2],
1876 )
1877 # Repeat the materialization tests with a dimension element that isn't
1878 # cached, so there's no way we can know when building the query where
1879 # there are any rows are not (there aren't).
1880 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1881 with dataIds.materialize() as dataIds:
1882 self.checkQueryResults(dataIds, [])
1883 self.checkQueryResults(
1884 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1885 )
1886 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1887 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1888 # Query for non-empty data IDs with a constraint on an empty-data-ID
1889 # dataset that exists.
1890 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1891 self.checkQueryResults(
1892 dataIds.subset(unique=True),
1893 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1894 )
1895 # Again query for non-empty data IDs with a constraint on empty-data-ID
1896 # datasets, but when the datasets don't exist. We delete the existing
1897 # dataset and query just that collection rather than creating a new
1898 # empty collection because this is a bit less likely for our build-time
1899 # logic to shortcut-out (via the collection summaries), and such a
1900 # shortcut would make this test a bit more trivial than we'd like.
1901 registry.removeDatasets([dataset2])
1902 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1903 self.checkQueryResults(dataIds, [])
1905 def testDimensionDataModifications(self):
1906 """Test that modifying dimension records via:
1907 syncDimensionData(..., update=True) and
1908 insertDimensionData(..., replace=True) works as expected, even in the
1909 presence of datasets using those dimensions and spatial overlap
1910 relationships.
1911 """
1913 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1914 """Unpack a sphgeom.RangeSet into the integers it contains."""
1915 for begin, end in ranges:
1916 yield from range(begin, end)
1918 def _range_set_hull(
1919 ranges: lsst.sphgeom.RangeSet,
1920 pixelization: lsst.sphgeom.HtmPixelization,
1921 ) -> lsst.sphgeom.ConvexPolygon:
1922 """Create a ConvexPolygon hull of the region defined by a set of
1923 HTM pixelization index ranges.
1924 """
1925 points = []
1926 for index in _unpack_range_set(ranges):
1927 points.extend(pixelization.triangle(index).getVertices())
1928 return lsst.sphgeom.ConvexPolygon(points)
1930 # Use HTM to set up an initial parent region (one arbitrary trixel)
1931 # and four child regions (the trixels within the parent at the next
1932 # level. We'll use the parent as a tract/visit region and the children
1933 # as its patch/visit_detector regions.
1934 registry = self.makeRegistry()
1935 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1936 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1937 index = 12288
1938 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1939 assert htm6.universe().contains(child_ranges_small)
1940 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1941 parent_region_small = lsst.sphgeom.ConvexPolygon(
1942 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1943 )
1944 assert all(parent_region_small.contains(c) for c in child_regions_small)
1945 # Make a larger version of each child region, defined to be the set of
1946 # htm6 trixels that overlap the original's bounding circle. Make a new
1947 # parent that's the convex hull of the new children.
1948 child_regions_large = [
1949 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1950 ]
1951 assert all(
1952 large.contains(small)
1953 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1954 )
1955 parent_region_large = lsst.sphgeom.ConvexPolygon(
1956 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1957 )
1958 assert all(parent_region_large.contains(c) for c in child_regions_large)
1959 assert parent_region_large.contains(parent_region_small)
1960 assert not parent_region_small.contains(parent_region_large)
1961 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1962 # Find some commonSkyPix indices that overlap the large regions but not
1963 # overlap the small regions. We use commonSkyPix here to make sure the
1964 # real tests later involve what's in the database, not just post-query
1965 # filtering of regions.
1966 child_difference_indices = []
1967 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1968 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1969 assert difference, "if this is empty, we can't test anything useful with these regions"
1970 assert all(
1971 not commonSkyPix.triangle(d).isDisjointFrom(large)
1972 and commonSkyPix.triangle(d).isDisjointFrom(small)
1973 for d in difference
1974 )
1975 child_difference_indices.append(difference)
1976 parent_difference_indices = list(
1977 _unpack_range_set(
1978 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1979 )
1980 )
1981 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1982 assert all(
1983 (
1984 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1985 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1986 )
1987 for d in parent_difference_indices
1988 )
1989 # Now that we've finally got those regions, we'll insert the large ones
1990 # as tract/patch dimension records.
1991 skymap_name = "testing_v1"
1992 registry.insertDimensionData(
1993 "skymap",
1994 {
1995 "name": skymap_name,
1996 "hash": bytes([42]),
1997 "tract_max": 1,
1998 "patch_nx_max": 2,
1999 "patch_ny_max": 2,
2000 },
2001 )
2002 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
2003 registry.insertDimensionData(
2004 "patch",
2005 *[
2006 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2007 for n, c in enumerate(child_regions_large)
2008 ],
2009 )
2010 # Add at dataset that uses these dimensions to make sure that modifying
2011 # them doesn't disrupt foreign keys (need to make sure DB doesn't
2012 # implement insert with replace=True as delete-then-insert).
2013 dataset_type = DatasetType(
2014 "coadd",
2015 dimensions=["tract", "patch"],
2016 universe=registry.dimensions,
2017 storageClass="Exposure",
2018 )
2019 registry.registerDatasetType(dataset_type)
2020 registry.registerCollection("the_run", CollectionType.RUN)
2021 registry.insertDatasets(
2022 dataset_type,
2023 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2024 run="the_run",
2025 )
2026 # Query for tracts and patches that overlap some "difference" htm9
2027 # pixels; there should be overlaps, because the database has
2028 # the "large" suite of regions.
2029 self.assertEqual(
2030 {0},
2031 {
2032 data_id["tract"]
2033 for data_id in registry.queryDataIds(
2034 ["tract"],
2035 skymap=skymap_name,
2036 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2037 )
2038 },
2039 )
2040 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2041 self.assertIn(
2042 patch_id,
2043 {
2044 data_id["patch"]
2045 for data_id in registry.queryDataIds(
2046 ["patch"],
2047 skymap=skymap_name,
2048 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2049 )
2050 },
2051 )
2052 # Use sync to update the tract region and insert to update the regions
2053 # of the patches, to the "small" suite.
2054 updated = registry.syncDimensionData(
2055 "tract",
2056 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2057 update=True,
2058 )
2059 self.assertEqual(updated, {"region": parent_region_large})
2060 registry.insertDimensionData(
2061 "patch",
2062 *[
2063 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2064 for n, c in enumerate(child_regions_small)
2065 ],
2066 replace=True,
2067 )
2068 # Query again; there now should be no such overlaps, because the
2069 # database has the "small" suite of regions.
2070 self.assertFalse(
2071 set(
2072 registry.queryDataIds(
2073 ["tract"],
2074 skymap=skymap_name,
2075 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2076 )
2077 )
2078 )
2079 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2080 self.assertNotIn(
2081 patch_id,
2082 {
2083 data_id["patch"]
2084 for data_id in registry.queryDataIds(
2085 ["patch"],
2086 skymap=skymap_name,
2087 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2088 )
2089 },
2090 )
2091 # Update back to the large regions and query one more time.
2092 updated = registry.syncDimensionData(
2093 "tract",
2094 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2095 update=True,
2096 )
2097 self.assertEqual(updated, {"region": parent_region_small})
2098 registry.insertDimensionData(
2099 "patch",
2100 *[
2101 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2102 for n, c in enumerate(child_regions_large)
2103 ],
2104 replace=True,
2105 )
2106 self.assertEqual(
2107 {0},
2108 {
2109 data_id["tract"]
2110 for data_id in registry.queryDataIds(
2111 ["tract"],
2112 skymap=skymap_name,
2113 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2114 )
2115 },
2116 )
2117 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2118 self.assertIn(
2119 patch_id,
2120 {
2121 data_id["patch"]
2122 for data_id in registry.queryDataIds(
2123 ["patch"],
2124 skymap=skymap_name,
2125 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2126 )
2127 },
2128 )
2130 def testCalibrationCollections(self):
2131 """Test operations on `~CollectionType.CALIBRATION` collections,
2132 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2133 `SqlRegistry.findDataset`, and
2134 `DataCoordinateQueryResults.findRelatedDatasets`.
2135 """
2136 # Setup - make a Registry, fill it with some datasets in
2137 # non-calibration collections.
2138 registry = self.makeRegistry()
2139 self.loadData(registry, "base.yaml")
2140 self.loadData(registry, "datasets.yaml")
2141 # Set up some timestamps.
2142 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2143 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2144 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2145 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2146 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2147 allTimespans = [
2148 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2149 ]
2150 # Insert some exposure records with timespans between each sequential
2151 # pair of those.
2152 registry.insertDimensionData(
2153 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2154 )
2155 registry.insertDimensionData(
2156 "group",
2157 {"instrument": "Cam1", "name": "group0"},
2158 {"instrument": "Cam1", "name": "group1"},
2159 {"instrument": "Cam1", "name": "group2"},
2160 {"instrument": "Cam1", "name": "group3"},
2161 )
2162 registry.insertDimensionData(
2163 "exposure",
2164 {
2165 "instrument": "Cam1",
2166 "id": 0,
2167 "group": "group0",
2168 "obs_id": "zero",
2169 "physical_filter": "Cam1-G",
2170 "day_obs": 20200101,
2171 "timespan": Timespan(t1, t2),
2172 },
2173 {
2174 "instrument": "Cam1",
2175 "id": 1,
2176 "group": "group1",
2177 "obs_id": "one",
2178 "physical_filter": "Cam1-G",
2179 "day_obs": 20200101,
2180 "timespan": Timespan(t2, t3),
2181 },
2182 {
2183 "instrument": "Cam1",
2184 "id": 2,
2185 "group": "group2",
2186 "obs_id": "two",
2187 "physical_filter": "Cam1-G",
2188 "day_obs": 20200101,
2189 "timespan": Timespan(t3, t4),
2190 },
2191 {
2192 "instrument": "Cam1",
2193 "id": 3,
2194 "group": "group3",
2195 "obs_id": "three",
2196 "physical_filter": "Cam1-G",
2197 "day_obs": 20200101,
2198 "timespan": Timespan(t4, t5),
2199 },
2200 )
2201 # Get references to some datasets.
2202 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2203 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2204 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2205 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2206 # Register the main calibration collection we'll be working with.
2207 collection = "Cam1/calibs/default"
2208 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2209 # Cannot associate into a calibration collection (no timespan).
2210 with self.assertRaises(CollectionTypeError):
2211 registry.associate(collection, [bias2a])
2212 # Certify 2a dataset with [t2, t4) validity.
2213 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2214 # Test that we can query for this dataset via the new collection, both
2215 # on its own and with a RUN collection.
2216 self.assertEqual(
2217 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2218 {bias2a},
2219 )
2220 self.assertEqual(
2221 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2222 {
2223 bias2a,
2224 bias2b,
2225 bias3b,
2226 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2227 },
2228 )
2229 self.assertEqual(
2230 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2231 {registry.expandDataId(instrument="Cam1", detector=2)},
2232 )
2233 self.assertEqual(
2234 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2235 {
2236 registry.expandDataId(instrument="Cam1", detector=2),
2237 registry.expandDataId(instrument="Cam1", detector=3),
2238 registry.expandDataId(instrument="Cam1", detector=4),
2239 },
2240 )
2241 self.assertEqual(
2242 set(
2243 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2244 "bias", findFirst=True, collections=[collection]
2245 )
2246 ),
2247 {
2248 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2249 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2250 },
2251 )
2252 self.assertEqual(
2253 set(
2254 registry.queryDataIds(
2255 ["exposure", "detector"], instrument="Cam1", detector=2
2256 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2257 ),
2258 {
2259 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2260 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2261 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2262 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2263 },
2264 )
2266 # We should not be able to certify 2b with anything overlapping that
2267 # window.
2268 with self.assertRaises(ConflictingDefinitionError):
2269 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2270 with self.assertRaises(ConflictingDefinitionError):
2271 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2272 with self.assertRaises(ConflictingDefinitionError):
2273 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2274 with self.assertRaises(ConflictingDefinitionError):
2275 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2276 with self.assertRaises(ConflictingDefinitionError):
2277 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2278 with self.assertRaises(ConflictingDefinitionError):
2279 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2280 with self.assertRaises(ConflictingDefinitionError):
2281 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2282 with self.assertRaises(ConflictingDefinitionError):
2283 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2284 # We should be able to certify 3a with a range overlapping that window,
2285 # because it's for a different detector.
2286 # We'll certify 3a over [t1, t3).
2287 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2288 # Now we'll certify 2b and 3b together over [t4, ∞).
2289 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2291 # Fetch all associations and check that they are what we expect.
2292 self.assertCountEqual(
2293 list(
2294 registry.queryDatasetAssociations(
2295 "bias",
2296 collections=[collection, "imported_g", "imported_r"],
2297 )
2298 ),
2299 [
2300 DatasetAssociation(
2301 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2302 collection="imported_g",
2303 timespan=None,
2304 ),
2305 DatasetAssociation(
2306 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2307 collection="imported_r",
2308 timespan=None,
2309 ),
2310 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2311 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2312 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2313 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2314 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2315 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2316 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2317 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2318 ],
2319 )
2321 class Ambiguous:
2322 """Tag class to denote lookups that should be ambiguous."""
2324 pass
2326 def _assertLookup(
2327 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2328 ) -> None:
2329 """Local function that asserts that a bias lookup returns the given
2330 expected result.
2331 """
2332 if expected is Ambiguous:
2333 with self.assertRaises((DatasetTypeError, LookupError)):
2334 registry.findDataset(
2335 "bias",
2336 collections=collection,
2337 instrument="Cam1",
2338 detector=detector,
2339 timespan=timespan,
2340 )
2341 else:
2342 self.assertEqual(
2343 expected,
2344 registry.findDataset(
2345 "bias",
2346 collections=collection,
2347 instrument="Cam1",
2348 detector=detector,
2349 timespan=timespan,
2350 ),
2351 )
2353 # Systematically test lookups against expected results.
2354 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2355 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2356 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2357 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2358 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2359 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2360 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2361 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2362 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2363 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2364 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2365 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2366 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2367 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2368 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2369 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2370 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2371 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2372 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2373 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2374 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2375 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2376 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2377 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2378 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2379 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2380 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2381 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2382 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2383 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2384 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2385 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2386 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2387 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2388 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2389 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2390 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2391 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2392 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2393 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2394 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2395 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2397 # Test lookups via temporal joins to exposures.
2398 self.assertEqual(
2399 set(
2400 registry.queryDataIds(
2401 ["exposure", "detector"], instrument="Cam1", detector=2
2402 ).findRelatedDatasets("bias", collections=[collection])
2403 ),
2404 {
2405 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2406 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2407 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2408 },
2409 )
2410 self.assertEqual(
2411 set(
2412 registry.queryDataIds(
2413 ["exposure", "detector"], instrument="Cam1", detector=3
2414 ).findRelatedDatasets("bias", collections=[collection])
2415 ),
2416 {
2417 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2418 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2419 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2420 },
2421 )
2422 self.assertEqual(
2423 set(
2424 registry.queryDataIds(
2425 ["exposure", "detector"], instrument="Cam1", detector=2
2426 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2427 ),
2428 {
2429 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2430 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2431 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2432 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2433 },
2434 )
2435 self.assertEqual(
2436 set(
2437 registry.queryDataIds(
2438 ["exposure", "detector"], instrument="Cam1", detector=3
2439 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2440 ),
2441 {
2442 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2443 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2444 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2445 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2446 },
2447 )
2449 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2450 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2451 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2452 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2453 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2454 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2455 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2456 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2457 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2458 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2459 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2460 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2461 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2462 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2463 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2464 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2465 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2466 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2467 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2468 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2469 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2470 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2471 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2472 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2473 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2474 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2475 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2476 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2477 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2478 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2479 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2480 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2481 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2482 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2483 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2484 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2485 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2486 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2487 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2488 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2489 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2490 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2491 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2492 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2493 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2494 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2496 # Decertify everything, this time with explicit data IDs, then check
2497 # that no lookups succeed.
2498 registry.decertify(
2499 collection,
2500 "bias",
2501 Timespan(None, None),
2502 dataIds=[
2503 dict(instrument="Cam1", detector=2),
2504 dict(instrument="Cam1", detector=3),
2505 ],
2506 )
2507 for detector in (2, 3):
2508 for timespan in allTimespans:
2509 _assertLookup(detector=detector, timespan=timespan, expected=None)
2510 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2511 # those.
2512 registry.certify(
2513 collection,
2514 [bias2a, bias3a],
2515 Timespan(None, None),
2516 )
2517 for timespan in allTimespans:
2518 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2519 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2520 # Decertify just bias2 over [t2, t4).
2521 # This should split a single certification row into two (and leave the
2522 # other existing row, for bias3a, alone).
2523 registry.decertify(
2524 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2525 )
2526 for timespan in allTimespans:
2527 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2528 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2529 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2530 if overlapsBefore and overlapsAfter:
2531 expected = Ambiguous
2532 elif overlapsBefore or overlapsAfter:
2533 expected = bias2a
2534 else:
2535 expected = None
2536 _assertLookup(detector=2, timespan=timespan, expected=expected)
2538 def testSkipCalibs(self):
2539 """Test how queries handle skipping of calibration collections."""
2540 registry = self.makeRegistry()
2541 self.loadData(registry, "base.yaml")
2542 self.loadData(registry, "datasets.yaml")
2544 coll_calib = "Cam1/calibs/default"
2545 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2547 # Add all biases to the calibration collection.
2548 # Without this, the logic that prunes dataset subqueries based on
2549 # datasetType-collection summary information will fire before the logic
2550 # we want to test below. This is a good thing (it avoids the dreaded
2551 # NotImplementedError a bit more often) everywhere but here.
2552 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2554 coll_list = [coll_calib, "imported_g", "imported_r"]
2555 chain = "Cam1/chain"
2556 registry.registerCollection(chain, type=CollectionType.CHAINED)
2557 registry.setCollectionChain(chain, coll_list)
2559 # explicit list will raise if findFirst=True or there are temporal
2560 # dimensions
2561 with self.assertRaises(NotImplementedError):
2562 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2563 with self.assertRaises(NotImplementedError):
2564 registry.queryDataIds(
2565 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2566 ).count()
2568 # chain will skip
2569 datasets = list(registry.queryDatasets("bias", collections=chain))
2570 self.assertGreater(len(datasets), 0)
2572 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2573 self.assertGreater(len(dataIds), 0)
2575 # glob will skip too
2576 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2577 self.assertGreater(len(datasets), 0)
2579 # regular expression will skip too
2580 pattern = re.compile(".*")
2581 datasets = list(registry.queryDatasets("bias", collections=pattern))
2582 self.assertGreater(len(datasets), 0)
2584 # ellipsis should work as usual
2585 datasets = list(registry.queryDatasets("bias", collections=...))
2586 self.assertGreater(len(datasets), 0)
2588 # few tests with findFirst
2589 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2590 self.assertGreater(len(datasets), 0)
2592 def testIngestTimeQuery(self):
2593 registry = self.makeRegistry()
2594 self.loadData(registry, "base.yaml")
2595 dt0 = datetime.datetime.now(datetime.UTC)
2596 self.loadData(registry, "datasets.yaml")
2597 dt1 = datetime.datetime.now(datetime.UTC)
2599 datasets = list(registry.queryDatasets(..., collections=...))
2600 len0 = len(datasets)
2601 self.assertGreater(len0, 0)
2603 where = "ingest_date > T'2000-01-01'"
2604 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2605 len1 = len(datasets)
2606 self.assertEqual(len0, len1)
2608 # no one will ever use this piece of software in 30 years
2609 where = "ingest_date > T'2050-01-01'"
2610 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2611 len2 = len(datasets)
2612 self.assertEqual(len2, 0)
2614 # Check more exact timing to make sure there is no 37 seconds offset
2615 # (after fixing DM-30124). SQLite time precision is 1 second, make
2616 # sure that we don't test with higher precision.
2617 tests = [
2618 # format: (timestamp, operator, expected_len)
2619 (dt0 - timedelta(seconds=1), ">", len0),
2620 (dt0 - timedelta(seconds=1), "<", 0),
2621 (dt1 + timedelta(seconds=1), "<", len0),
2622 (dt1 + timedelta(seconds=1), ">", 0),
2623 ]
2624 for dt, op, expect_len in tests:
2625 dt_str = dt.isoformat(sep=" ")
2627 where = f"ingest_date {op} T'{dt_str}'"
2628 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2629 self.assertEqual(len(datasets), expect_len)
2631 # same with bind using datetime or astropy Time
2632 where = f"ingest_date {op} ingest_time"
2633 datasets = list(
2634 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2635 )
2636 self.assertEqual(len(datasets), expect_len)
2638 dt_astropy = astropy.time.Time(dt, format="datetime")
2639 datasets = list(
2640 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2641 )
2642 self.assertEqual(len(datasets), expect_len)
2644 def testTimespanQueries(self):
2645 """Test query expressions involving timespans."""
2646 registry = self.makeRegistry()
2647 self.loadData(registry, "hsc-rc2-subset.yaml")
2648 # All exposures in the database; mapping from ID to timespan.
2649 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2650 # Just those IDs, sorted (which is also temporal sorting, because HSC
2651 # exposure IDs are monotonically increasing).
2652 ids = sorted(visits.keys())
2653 self.assertGreater(len(ids), 20)
2654 # Pick some quasi-random indexes into `ids` to play with.
2655 i1 = int(len(ids) * 0.1)
2656 i2 = int(len(ids) * 0.3)
2657 i3 = int(len(ids) * 0.6)
2658 i4 = int(len(ids) * 0.8)
2659 # Extract some times from those: just before the beginning of i1 (which
2660 # should be after the end of the exposure before), exactly the
2661 # beginning of i2, just after the beginning of i3 (and before its end),
2662 # and the exact end of i4.
2663 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2664 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2665 t2 = visits[ids[i2]].begin
2666 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2667 self.assertLess(t3, visits[ids[i3]].end)
2668 t4 = visits[ids[i4]].end
2669 # Make sure those are actually in order.
2670 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2672 bind = {
2673 "t1": t1,
2674 "t2": t2,
2675 "t3": t3,
2676 "t4": t4,
2677 "ts23": Timespan(t2, t3),
2678 }
2680 def query(where):
2681 """Return results as a sorted, deduplicated list of visit IDs.
2683 Parameters
2684 ----------
2685 where : `str`
2686 The WHERE clause for the query.
2687 """
2688 return sorted(
2689 {
2690 dataId["visit"]
2691 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2692 }
2693 )
2695 # Try a bunch of timespan queries, mixing up the bounds themselves,
2696 # where they appear in the expression, and how we get the timespan into
2697 # the expression.
2699 # t1 is before the start of i1, so this should not include i1.
2700 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2701 # t2 is exactly at the start of i2, but ends are exclusive, so these
2702 # should not include i2.
2703 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2704 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2705 # t3 is in the middle of i3, so this should include i3.
2706 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2707 # This one should not include t3 by the same reasoning.
2708 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2709 # t4 is exactly at the end of i4, so this should include i4.
2710 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2711 # i4's upper bound of t4 is exclusive so this should not include t4.
2712 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2714 # Now some timespan vs. time scalar queries.
2715 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2716 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2717 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2718 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2719 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2720 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2722 # Empty timespans should not overlap anything.
2723 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2725 def testCollectionSummaries(self):
2726 """Test recording and retrieval of collection summaries."""
2727 self.maxDiff = None
2728 registry = self.makeRegistry()
2729 # Importing datasets from yaml should go through the code path where
2730 # we update collection summaries as we insert datasets.
2731 self.loadData(registry, "base.yaml")
2732 self.loadData(registry, "datasets.yaml")
2733 flat = registry.getDatasetType("flat")
2734 expected1 = CollectionSummary()
2735 expected1.dataset_types.add(registry.getDatasetType("bias"))
2736 expected1.add_data_ids(
2737 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2738 )
2739 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2740 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2741 # Create a chained collection with both of the imported runs; the
2742 # summary should be the same, because it's a union with itself.
2743 chain = "chain"
2744 registry.registerCollection(chain, CollectionType.CHAINED)
2745 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2746 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2747 # Associate flats only into a tagged collection and a calibration
2748 # collection to check summaries of those.
2749 tag = "tag"
2750 registry.registerCollection(tag, CollectionType.TAGGED)
2751 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2752 calibs = "calibs"
2753 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2754 registry.certify(
2755 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2756 )
2757 expected2 = expected1.copy()
2758 expected2.dataset_types.discard("bias")
2759 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2760 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2761 # Explicitly calling SqlRegistry.refresh() should load those same
2762 # summaries, via a totally different code path.
2763 registry.refresh()
2764 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2765 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2766 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2767 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2769 def testBindInQueryDatasets(self):
2770 """Test that the bind parameter is correctly forwarded in
2771 queryDatasets recursion.
2772 """
2773 registry = self.makeRegistry()
2774 # Importing datasets from yaml should go through the code path where
2775 # we update collection summaries as we insert datasets.
2776 self.loadData(registry, "base.yaml")
2777 self.loadData(registry, "datasets.yaml")
2778 self.assertEqual(
2779 set(registry.queryDatasets("flat", band="r", collections=...)),
2780 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2781 )
2783 def testQueryIntRangeExpressions(self):
2784 """Test integer range expressions in ``where`` arguments.
2786 Note that our expressions use inclusive stop values, unlike Python's.
2787 """
2788 registry = self.makeRegistry()
2789 self.loadData(registry, "base.yaml")
2790 self.assertEqual(
2791 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2792 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2793 )
2794 self.assertEqual(
2795 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2796 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2797 )
2798 self.assertEqual(
2799 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2800 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2801 )
2803 def testQueryResultSummaries(self):
2804 """Test summary methods like `count`, `any`, and `explain_no_results`
2805 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2806 """
2807 registry = self.makeRegistry()
2808 self.loadData(registry, "base.yaml")
2809 self.loadData(registry, "datasets.yaml")
2810 self.loadData(registry, "spatial.yaml")
2811 # Default test dataset has two collections, each with both flats and
2812 # biases. Add a new collection with only biases.
2813 registry.registerCollection("biases", CollectionType.TAGGED)
2814 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2815 # First query yields two results, and involves no postprocessing.
2816 query1 = registry.queryDataIds(["physical_filter"], band="r")
2817 self.assertTrue(query1.any(execute=False, exact=False))
2818 self.assertTrue(query1.any(execute=True, exact=False))
2819 self.assertTrue(query1.any(execute=True, exact=True))
2820 self.assertEqual(query1.count(exact=False), 2)
2821 self.assertEqual(query1.count(exact=True), 2)
2822 self.assertFalse(list(query1.explain_no_results()))
2823 # Second query should yield no results, which we should see when
2824 # we attempt to expand the data ID.
2825 query2 = registry.queryDataIds(["physical_filter"], band="h")
2826 # There's no execute=False, exact=Fals test here because the behavior
2827 # not something we want to guarantee in this case (and exact=False
2828 # says either answer is legal).
2829 self.assertFalse(query2.any(execute=True, exact=False))
2830 self.assertFalse(query2.any(execute=True, exact=True))
2831 self.assertEqual(query2.count(exact=False), 0)
2832 self.assertEqual(query2.count(exact=True), 0)
2833 self.assertTrue(list(query2.explain_no_results()))
2834 # These queries yield no results due to various problems that can be
2835 # spotted prior to execution, yielding helpful diagnostics.
2836 base_query = registry.queryDataIds(["detector", "physical_filter"])
2837 queries_and_snippets = [
2838 (
2839 # Dataset type name doesn't match any existing dataset types.
2840 registry.queryDatasets("nonexistent", collections=...),
2841 ["nonexistent"],
2842 ),
2843 (
2844 # Dataset type object isn't registered.
2845 registry.queryDatasets(
2846 DatasetType(
2847 "nonexistent",
2848 dimensions=["instrument"],
2849 universe=registry.dimensions,
2850 storageClass="Image",
2851 ),
2852 collections=...,
2853 ),
2854 ["nonexistent"],
2855 ),
2856 (
2857 # No datasets of this type in this collection.
2858 registry.queryDatasets("flat", collections=["biases"]),
2859 ["flat", "biases"],
2860 ),
2861 (
2862 # No datasets of this type in this collection.
2863 base_query.findDatasets("flat", collections=["biases"]),
2864 ["flat", "biases"],
2865 ),
2866 (
2867 # No collections matching at all.
2868 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2869 ["potato"],
2870 ),
2871 ]
2872 with self.assertRaises(MissingDatasetTypeError):
2873 # Dataset type name doesn't match any existing dataset types.
2874 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2875 with self.assertRaises(MissingDatasetTypeError):
2876 # Dataset type name doesn't match any existing dataset types.
2877 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2878 for query, snippets in queries_and_snippets:
2879 self.assertFalse(query.any(execute=False, exact=False))
2880 self.assertFalse(query.any(execute=True, exact=False))
2881 self.assertFalse(query.any(execute=True, exact=True))
2882 self.assertEqual(query.count(exact=False), 0)
2883 self.assertEqual(query.count(exact=True), 0)
2884 messages = list(query.explain_no_results())
2885 self.assertTrue(messages)
2886 # Want all expected snippets to appear in at least one message.
2887 self.assertTrue(
2888 any(
2889 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2890 ),
2891 messages,
2892 )
2894 # Wildcards on dataset types are not permitted in queryDataIds.
2895 with self.assertRaises(DatasetTypeExpressionError):
2896 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2898 # These queries yield no results due to problems that can be identified
2899 # by cheap follow-up queries, yielding helpful diagnostics.
2900 for query, snippets in [
2901 (
2902 # No records for one of the involved dimensions.
2903 registry.queryDataIds(["subfilter"]),
2904 ["no rows", "subfilter"],
2905 ),
2906 (
2907 # No records for one of the involved dimensions.
2908 registry.queryDimensionRecords("subfilter"),
2909 ["no rows", "subfilter"],
2910 ),
2911 ]:
2912 self.assertFalse(query.any(execute=True, exact=False))
2913 self.assertFalse(query.any(execute=True, exact=True))
2914 self.assertEqual(query.count(exact=True), 0)
2915 messages = list(query.explain_no_results())
2916 self.assertTrue(messages)
2917 # Want all expected snippets to appear in at least one message.
2918 self.assertTrue(
2919 any(
2920 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2921 ),
2922 messages,
2923 )
2925 # This query yields four overlaps in the database, but one is filtered
2926 # out in postprocessing. The count queries aren't accurate because
2927 # they don't account for duplication that happens due to an internal
2928 # join against commonSkyPix.
2929 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2930 self.assertEqual(
2931 {
2932 DataCoordinate.standardize(
2933 instrument="Cam1",
2934 skymap="SkyMap1",
2935 visit=v,
2936 tract=t,
2937 universe=registry.dimensions,
2938 )
2939 for v, t in [(1, 0), (2, 0), (2, 1)]
2940 },
2941 set(query3),
2942 )
2943 self.assertTrue(query3.any(execute=False, exact=False))
2944 self.assertTrue(query3.any(execute=True, exact=False))
2945 self.assertTrue(query3.any(execute=True, exact=True))
2946 self.assertGreaterEqual(query3.count(exact=False), 4)
2947 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2948 self.assertFalse(list(query3.explain_no_results()))
2949 # This query yields overlaps in the database, but all are filtered
2950 # out in postprocessing. The count queries again aren't very useful.
2951 # We have to use `where=` here to avoid an optimization that
2952 # (currently) skips the spatial postprocess-filtering because it
2953 # recognizes that no spatial join is necessary. That's not ideal, but
2954 # fixing it is out of scope for this ticket.
2955 query4 = registry.queryDataIds(
2956 ["visit", "tract"],
2957 instrument="Cam1",
2958 skymap="SkyMap1",
2959 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2960 )
2961 self.assertFalse(set(query4))
2962 self.assertTrue(query4.any(execute=False, exact=False))
2963 self.assertTrue(query4.any(execute=True, exact=False))
2964 self.assertFalse(query4.any(execute=True, exact=True))
2965 self.assertGreaterEqual(query4.count(exact=False), 1)
2966 self.assertEqual(query4.count(exact=True, discard=True), 0)
2967 messages = query4.explain_no_results()
2968 self.assertTrue(messages)
2969 self.assertTrue(any("overlap" in message for message in messages))
2970 # This query should yield results from one dataset type but not the
2971 # other, which is not registered.
2972 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2973 self.assertTrue(set(query5))
2974 self.assertTrue(query5.any(execute=False, exact=False))
2975 self.assertTrue(query5.any(execute=True, exact=False))
2976 self.assertTrue(query5.any(execute=True, exact=True))
2977 self.assertGreaterEqual(query5.count(exact=False), 1)
2978 self.assertGreaterEqual(query5.count(exact=True), 1)
2979 self.assertFalse(list(query5.explain_no_results()))
2980 # This query applies a selection that yields no results, fully in the
2981 # database. Explaining why it fails involves traversing the relation
2982 # tree and running a LIMIT 1 query at each level that has the potential
2983 # to remove rows.
2984 query6 = registry.queryDimensionRecords(
2985 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2986 )
2987 self.assertEqual(query6.count(exact=True), 0)
2988 messages = query6.explain_no_results()
2989 self.assertTrue(messages)
2990 self.assertTrue(any("no-purpose" in message for message in messages))
2992 def testQueryDataIdsExpressionError(self):
2993 """Test error checking of 'where' expressions in queryDataIds."""
2994 registry = self.makeRegistry()
2995 self.loadData(registry, "base.yaml")
2996 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2997 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2998 registry.queryDataIds(["detector"], where="foo.bar = 12")
2999 with self.assertRaisesRegex(
3000 LookupError, "Dimension element name cannot be inferred in this context."
3001 ):
3002 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
3004 def testQueryDataIdsOrderBy(self):
3005 """Test order_by and limit on result returned by queryDataIds()."""
3006 registry = self.makeRegistry()
3007 self.loadData(registry, "base.yaml")
3008 self.loadData(registry, "datasets.yaml")
3009 self.loadData(registry, "spatial.yaml")
3011 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
3012 return registry.queryDataIds(
3013 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
3014 )
3016 Test = namedtuple(
3017 "testQueryDataIdsOrderByTest",
3018 ("order_by", "keys", "result", "limit", "datasets", "collections"),
3019 defaults=(None, None, None),
3020 )
3022 test_data = (
3023 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3024 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
3025 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
3026 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
3027 Test(
3028 "tract.id,visit.id",
3029 "tract,visit",
3030 ((0, 1), (0, 1), (0, 2)),
3031 limit=(3,),
3032 ),
3033 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
3034 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
3035 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
3036 Test(
3037 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
3038 ),
3039 Test(
3040 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
3041 ),
3042 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3043 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3044 Test(
3045 "tract,-visit.timespan.begin,visit.timespan.end",
3046 "tract,visit",
3047 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
3048 ),
3049 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3050 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3051 Test(
3052 "tract,detector",
3053 "tract,detector",
3054 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3055 datasets="flat",
3056 collections="imported_r",
3057 ),
3058 Test(
3059 "tract,detector.full_name",
3060 "tract,detector",
3061 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3062 datasets="flat",
3063 collections="imported_r",
3064 ),
3065 Test(
3066 "tract,detector.raft,detector.name_in_raft",
3067 "tract,detector",
3068 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3069 datasets="flat",
3070 collections="imported_r",
3071 ),
3072 )
3074 for test in test_data:
3075 order_by = test.order_by.split(",")
3076 keys = test.keys.split(",")
3077 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3078 if test.limit is not None:
3079 query = query.limit(*test.limit)
3080 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3081 self.assertEqual(dataIds, test.result)
3083 # and materialize
3084 query = do_query(keys).order_by(*order_by)
3085 if test.limit is not None:
3086 query = query.limit(*test.limit)
3087 with self.assertRaises(RelationalAlgebraError):
3088 with query.materialize():
3089 pass
3091 # errors in a name
3092 for order_by in ("", "-"):
3093 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3094 list(do_query().order_by(order_by))
3096 for order_by in ("undimension.name", "-undimension.name"):
3097 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
3098 list(do_query().order_by(order_by))
3100 for order_by in ("attract", "-attract"):
3101 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3102 list(do_query().order_by(order_by))
3104 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3105 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3107 with self.assertRaisesRegex(
3108 ValueError,
3109 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3110 r"qualify timespan with specific dimension name\.",
3111 ):
3112 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3114 with self.assertRaisesRegex(
3115 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3116 ):
3117 list(do_query("tract").order_by("timespan.begin"))
3119 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3120 list(do_query("tract").order_by("tract.timespan.begin"))
3122 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3123 list(do_query("tract").order_by("tract.name"))
3125 with self.assertRaisesRegex(
3126 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3127 ):
3128 list(do_query("visit").order_by("timestamp.begin"))
3130 def testQueryDataIdsGovernorExceptions(self):
3131 """Test exceptions raised by queryDataIds() for incorrect governors."""
3132 registry = self.makeRegistry()
3133 self.loadData(registry, "base.yaml")
3134 self.loadData(registry, "datasets.yaml")
3135 self.loadData(registry, "spatial.yaml")
3137 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3138 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3140 Test = namedtuple(
3141 "testQueryDataIdExceptionsTest",
3142 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3143 defaults=(None, None, None, {}, None, 0),
3144 )
3146 test_data = (
3147 Test("tract,visit", count=6),
3148 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3149 Test(
3150 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3151 ),
3152 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3153 Test(
3154 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3155 ),
3156 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3157 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3158 Test(
3159 "tract,visit",
3160 where="instrument=cam AND skymap=map",
3161 bind={"cam": "Cam1", "map": "SkyMap1"},
3162 count=6,
3163 ),
3164 Test(
3165 "tract,visit",
3166 where="instrument=cam AND skymap=map",
3167 bind={"cam": "Cam", "map": "SkyMap"},
3168 exception=DataIdValueError,
3169 ),
3170 )
3172 for test in test_data:
3173 dimensions = test.dimensions.split(",")
3174 if test.exception:
3175 with self.assertRaises(test.exception):
3176 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3177 else:
3178 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3179 self.assertEqual(query.count(discard=True), test.count)
3181 # and materialize
3182 if test.exception:
3183 with self.assertRaises(test.exception):
3184 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3185 with query.materialize() as materialized:
3186 materialized.count(discard=True)
3187 else:
3188 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3189 with query.materialize() as materialized:
3190 self.assertEqual(materialized.count(discard=True), test.count)
3192 def testQueryDimensionRecordsOrderBy(self):
3193 """Test order_by and limit on result returned by
3194 queryDimensionRecords().
3195 """
3196 registry = self.makeRegistry()
3197 self.loadData(registry, "base.yaml")
3198 self.loadData(registry, "datasets.yaml")
3199 self.loadData(registry, "spatial.yaml")
3201 def do_query(element, datasets=None, collections=None):
3202 return registry.queryDimensionRecords(
3203 element, instrument="Cam1", datasets=datasets, collections=collections
3204 )
3206 query = do_query("detector")
3207 self.assertEqual(len(list(query)), 4)
3209 Test = namedtuple(
3210 "testQueryDataIdsOrderByTest",
3211 ("element", "order_by", "result", "limit", "datasets", "collections"),
3212 defaults=(None, None, None),
3213 )
3215 test_data = (
3216 Test("detector", "detector", (1, 2, 3, 4)),
3217 Test("detector", "-detector", (4, 3, 2, 1)),
3218 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3219 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3220 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3221 Test("visit", "visit", (1, 2)),
3222 Test("visit", "-visit.id", (2, 1)),
3223 Test("visit", "zenith_angle", (1, 2)),
3224 Test("visit", "-visit.name", (2, 1)),
3225 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3226 )
3228 for test in test_data:
3229 order_by = test.order_by.split(",")
3230 query = do_query(test.element).order_by(*order_by)
3231 if test.limit is not None:
3232 query = query.limit(*test.limit)
3233 dataIds = tuple(rec.id for rec in query)
3234 self.assertEqual(dataIds, test.result)
3236 # errors in a name
3237 for order_by in ("", "-"):
3238 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3239 list(do_query("detector").order_by(order_by))
3241 for order_by in ("undimension.name", "-undimension.name"):
3242 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3243 list(do_query("detector").order_by(order_by))
3245 for order_by in ("attract", "-attract"):
3246 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3247 list(do_query("detector").order_by(order_by))
3249 for order_by in ("timestamp.begin", "-timestamp.begin"):
3250 with self.assertRaisesRegex(
3251 ValueError,
3252 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3253 r"perhaps you meant 'timespan.begin'\?",
3254 ):
3255 list(do_query("visit").order_by(order_by))
3257 def testQueryDimensionRecordsExceptions(self):
3258 """Test exceptions raised by queryDimensionRecords()."""
3259 registry = self.makeRegistry()
3260 self.loadData(registry, "base.yaml")
3261 self.loadData(registry, "datasets.yaml")
3262 self.loadData(registry, "spatial.yaml")
3264 result = registry.queryDimensionRecords("detector")
3265 self.assertEqual(result.count(), 4)
3266 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3267 self.assertEqual(result.count(), 4)
3268 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3269 self.assertEqual(result.count(), 4)
3270 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3271 self.assertEqual(result.count(), 4)
3272 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3273 self.assertEqual(result.count(), 4)
3275 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3276 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3277 result.count()
3279 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3280 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3281 result.count()
3283 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3284 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3285 result.count()
3287 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3288 result = registry.queryDimensionRecords(
3289 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3290 )
3291 result.count()
3293 def testDatasetConstrainedDimensionRecordQueries(self):
3294 """Test that queryDimensionRecords works even when given a dataset
3295 constraint whose dimensions extend beyond the requested dimension
3296 element's.
3297 """
3298 registry = self.makeRegistry()
3299 self.loadData(registry, "base.yaml")
3300 self.loadData(registry, "datasets.yaml")
3301 # Query for physical_filter dimension records, using a dataset that
3302 # has both physical_filter and dataset dimensions.
3303 records = registry.queryDimensionRecords(
3304 "physical_filter",
3305 datasets=["flat"],
3306 collections="imported_r",
3307 )
3308 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3309 # Trying to constrain by all dataset types is an error.
3310 with self.assertRaises(TypeError):
3311 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3313 def testSkyPixDatasetQueries(self):
3314 """Test that we can build queries involving skypix dimensions as long
3315 as a dataset type that uses those dimensions is included.
3316 """
3317 registry = self.makeRegistry()
3318 self.loadData(registry, "base.yaml")
3319 dataset_type = DatasetType(
3320 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3321 )
3322 registry.registerDatasetType(dataset_type)
3323 run = "r"
3324 registry.registerRun(run)
3325 # First try queries where there are no datasets; the concern is whether
3326 # we can even build and execute these queries without raising, even
3327 # when "doomed" query shortcuts are in play.
3328 self.assertFalse(
3329 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3330 )
3331 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3332 # Now add a dataset and see that we can get it back.
3333 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3334 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3335 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3336 self.assertEqual(
3337 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3338 {data_id},
3339 )
3340 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3342 def testDatasetIdFactory(self):
3343 """Simple test for DatasetIdFactory, mostly to catch potential changes
3344 in its API.
3345 """
3346 registry = self.makeRegistry()
3347 factory = DatasetIdFactory()
3348 dataset_type = DatasetType(
3349 "datasetType",
3350 dimensions=["detector", "instrument"],
3351 universe=registry.dimensions,
3352 storageClass="int",
3353 )
3354 run = "run"
3355 data_id = DataCoordinate.standardize(
3356 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3357 )
3359 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3360 self.assertIsInstance(datasetId, uuid.UUID)
3361 self.assertEqual(datasetId.version, 4)
3363 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3364 self.assertIsInstance(datasetId, uuid.UUID)
3365 self.assertEqual(datasetId.version, 5)
3367 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3368 self.assertIsInstance(datasetId, uuid.UUID)
3369 self.assertEqual(datasetId.version, 5)
3371 def testExposureQueries(self):
3372 """Test query methods using arguments sourced from the exposure log
3373 service.
3375 The most complete test dataset currently available to daf_butler tests
3376 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3377 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3378 dimension records as it was focused on providing nontrivial spatial
3379 overlaps between visit+detector and tract+patch. So in this test we
3380 need to translate queries that originally used the exposure dimension
3381 to use the (very similar) visit dimension instead.
3382 """
3383 registry = self.makeRegistry()
3384 self.loadData(registry, "hsc-rc2-subset.yaml")
3385 self.assertEqual(
3386 [
3387 record.id
3388 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3389 .order_by("id")
3390 .limit(5)
3391 ],
3392 [318, 322, 326, 330, 332],
3393 )
3394 self.assertEqual(
3395 [
3396 data_id["visit"]
3397 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3398 ],
3399 [318, 322, 326, 330, 332],
3400 )
3401 self.assertEqual(
3402 [
3403 record.id
3404 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3405 .order_by("full_name")
3406 .limit(5)
3407 ],
3408 [73, 72, 71, 70, 65],
3409 )
3410 self.assertEqual(
3411 [
3412 data_id["detector"]
3413 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3414 .order_by("full_name")
3415 .limit(5)
3416 ],
3417 [73, 72, 71, 70, 65],
3418 )
3420 def test_long_query_names(self) -> None:
3421 """Test that queries involving very long names are handled correctly.
3423 This is especially important for PostgreSQL, which truncates symbols
3424 longer than 64 chars, but it's worth testing for all DBs.
3425 """
3426 registry = self.makeRegistry()
3427 name = "abcd" * 17
3428 registry.registerDatasetType(
3429 DatasetType(
3430 name,
3431 dimensions=(),
3432 storageClass="Exposure",
3433 universe=registry.dimensions,
3434 )
3435 )
3436 # Need to search more than one collection actually containing a
3437 # matching dataset to avoid optimizations that sidestep bugs due to
3438 # truncation by making findFirst=True a no-op.
3439 run1 = "run1"
3440 registry.registerRun(run1)
3441 run2 = "run2"
3442 registry.registerRun(run2)
3443 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3444 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3445 self.assertEqual(
3446 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3447 {ref1},
3448 )
3450 def test_skypix_constraint_queries(self) -> None:
3451 """Test queries spatially constrained by a skypix data ID."""
3452 registry = self.makeRegistry()
3453 self.loadData(registry, "hsc-rc2-subset.yaml")
3454 patch_regions = {
3455 (data_id["tract"], data_id["patch"]): data_id.region
3456 for data_id in registry.queryDataIds(["patch"]).expanded()
3457 }
3458 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3459 # This check ensures the test doesn't become trivial due to a config
3460 # change; if it does, just pick a different HTML level.
3461 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3462 # Gather all skypix IDs that definitely overlap at least one of these
3463 # patches.
3464 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3465 for patch_region in patch_regions.values():
3466 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3467 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3468 # and does not overlap at least one other patch.
3469 for skypix_id in itertools.chain.from_iterable(
3470 range(begin, end) for begin, end in relevant_skypix_ids
3471 ):
3472 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3473 overlapping_patches = {
3474 patch_key
3475 for patch_key, patch_region in patch_regions.items()
3476 if not patch_region.isDisjointFrom(skypix_region)
3477 }
3478 if overlapping_patches and overlapping_patches != patch_regions.keys():
3479 break
3480 else:
3481 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3482 self.assertEqual(
3483 {
3484 (data_id["tract"], data_id["patch"])
3485 for data_id in registry.queryDataIds(
3486 ["patch"],
3487 dataId={skypix_dimension.name: skypix_id},
3488 )
3489 },
3490 overlapping_patches,
3491 )
3492 # Test that a three-way join that includes the common skypix system in
3493 # the dimensions doesn't generate redundant join terms in the query.
3494 full_data_ids = set(
3495 registry.queryDataIds(
3496 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3497 ).expanded()
3498 )
3499 self.assertGreater(len(full_data_ids), 0)
3500 for data_id in full_data_ids:
3501 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3502 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3504 def test_spatial_constraint_queries(self) -> None:
3505 """Test queries in which one spatial dimension in the constraint (data
3506 ID or ``where`` string) constrains a different spatial dimension in the
3507 query result columns.
3508 """
3509 registry = self.makeRegistry()
3510 self.loadData(registry, "hsc-rc2-subset.yaml")
3511 patch_regions = {
3512 (data_id["tract"], data_id["patch"]): data_id.region
3513 for data_id in registry.queryDataIds(["patch"]).expanded()
3514 }
3515 observation_regions = {
3516 (data_id["visit"], data_id["detector"]): data_id.region
3517 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3518 }
3519 all_combos = {
3520 (patch_key, observation_key)
3521 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3522 }
3523 overlapping_combos = {
3524 (patch_key, observation_key)
3525 for patch_key, observation_key in all_combos
3526 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3527 }
3528 # Check a direct spatial join with no constraint first.
3529 self.assertEqual(
3530 {
3531 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3532 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3533 },
3534 overlapping_combos,
3535 )
3536 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3537 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3538 for patch_key, observation_key in overlapping_combos:
3539 overlaps_by_patch[patch_key].add(observation_key)
3540 overlaps_by_observation[observation_key].add(patch_key)
3541 # Find patches and observations that overlap at least one of the other
3542 # but not all of the other.
3543 nontrivial_patch = next(
3544 iter(
3545 patch_key
3546 for patch_key, observation_keys in overlaps_by_patch.items()
3547 if observation_keys and observation_keys != observation_regions.keys()
3548 )
3549 )
3550 nontrivial_observation = next(
3551 iter(
3552 observation_key
3553 for observation_key, patch_keys in overlaps_by_observation.items()
3554 if patch_keys and patch_keys != patch_regions.keys()
3555 )
3556 )
3557 # Use the nontrivial patches and observations as constraints on the
3558 # other dimensions in various ways, first via a 'where' expression.
3559 # It's better in general to us 'bind' instead of f-strings, but these
3560 # all integers so there are no quoting concerns.
3561 self.assertEqual(
3562 {
3563 (data_id["visit"], data_id["detector"])
3564 for data_id in registry.queryDataIds(
3565 ["visit", "detector"],
3566 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3567 skymap="hsc_rings_v1",
3568 )
3569 },
3570 overlaps_by_patch[nontrivial_patch],
3571 )
3572 self.assertEqual(
3573 {
3574 (data_id["tract"], data_id["patch"])
3575 for data_id in registry.queryDataIds(
3576 ["patch"],
3577 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3578 instrument="HSC",
3579 )
3580 },
3581 overlaps_by_observation[nontrivial_observation],
3582 )
3583 # and then via the dataId argument.
3584 self.assertEqual(
3585 {
3586 (data_id["visit"], data_id["detector"])
3587 for data_id in registry.queryDataIds(
3588 ["visit", "detector"],
3589 dataId={
3590 "tract": nontrivial_patch[0],
3591 "patch": nontrivial_patch[1],
3592 },
3593 skymap="hsc_rings_v1",
3594 )
3595 },
3596 overlaps_by_patch[nontrivial_patch],
3597 )
3598 self.assertEqual(
3599 {
3600 (data_id["tract"], data_id["patch"])
3601 for data_id in registry.queryDataIds(
3602 ["patch"],
3603 dataId={
3604 "visit": nontrivial_observation[0],
3605 "detector": nontrivial_observation[1],
3606 },
3607 instrument="HSC",
3608 )
3609 },
3610 overlaps_by_observation[nontrivial_observation],
3611 )
3613 def test_query_projection_drop_postprocessing(self) -> None:
3614 """Test that projections and deduplications on query objects can
3615 drop post-query region filtering to ensure the query remains in
3616 the SQL engine.
3617 """
3618 registry = self.makeRegistry()
3619 self.loadData(registry, "base.yaml")
3620 self.loadData(registry, "spatial.yaml")
3622 def pop_transfer(tree: Relation) -> Relation:
3623 """If a relation tree terminates with a transfer to a new engine,
3624 return the relation prior to that transfer. If not, return the
3625 original relation.
3627 Parameters
3628 ----------
3629 tree : `Relation`
3630 The relation tree to modify.
3631 """
3632 match tree:
3633 case Transfer(target=target):
3634 return target
3635 case _:
3636 return tree
3638 # There's no public way to get a Query object yet, so we get one from a
3639 # DataCoordinateQueryResults private attribute. When a public API is
3640 # available this test should use it.
3641 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3642 # We expect this query to terminate in the iteration engine originally,
3643 # because region-filtering is necessary.
3644 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3645 # If we deduplicate, we usually have to do that downstream of the
3646 # filtering. That means the deduplication has to happen in the
3647 # iteration engine.
3648 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3649 # If we pass drop_postprocessing, we instead drop the region filtering
3650 # so the deduplication can happen in SQL (though there might still be
3651 # transfer to iteration at the tail of the tree that we can ignore;
3652 # that's what the pop_transfer takes care of here).
3653 self.assertIsInstance(
3654 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3655 sql.Engine,
3656 )
3658 def test_query_find_datasets_drop_postprocessing(self) -> None:
3659 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3660 problems with the FindFirstDataset relation operation.
3661 """
3662 # Setup: load some visit, tract, and patch records, and insert two
3663 # datasets with dimensions {visit, patch}, with one in each of two
3664 # RUN collections.
3665 registry = self.makeRegistry()
3666 self.loadData(registry, "base.yaml")
3667 self.loadData(registry, "spatial.yaml")
3668 storage_class = StorageClass("Warpy")
3669 registry.storageClasses.registerStorageClass(storage_class)
3670 dataset_type = DatasetType(
3671 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3672 )
3673 registry.registerDatasetType(dataset_type)
3674 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3675 registry.registerRun("run1")
3676 registry.registerRun("run2")
3677 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3678 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3679 # Query for the dataset using queryDataIds(...).findDatasets(...)
3680 # against only one of the two collections. This should work even
3681 # though the relation returned by queryDataIds ends with
3682 # iteration-engine region-filtering, because we can recognize before
3683 # running the query that there is only one collecton to search and
3684 # hence the (default) findFirst=True is irrelevant, and joining in the
3685 # dataset query commutes past the iteration-engine postprocessing.
3686 query1 = registry.queryDataIds(
3687 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3688 )
3689 self.assertEqual(
3690 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3691 {ref1},
3692 )
3693 # Query for the dataset using queryDataIds(...).findDatasets(...)
3694 # against both collections. This can only work if the FindFirstDataset
3695 # operation can be commuted past the iteration-engine options into SQL.
3696 query2 = registry.queryDataIds(
3697 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3698 )
3699 self.assertEqual(
3700 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3701 {ref2},
3702 )
3704 def test_query_empty_collections(self) -> None:
3705 """Test for registry query methods with empty collections. The methods
3706 should return empty result set (or None when applicable) and provide
3707 "doomed" diagnostics.
3708 """
3709 registry = self.makeRegistry()
3710 self.loadData(registry, "base.yaml")
3711 self.loadData(registry, "datasets.yaml")
3713 # Tests for registry.findDataset()
3714 with self.assertRaises(NoDefaultCollectionError):
3715 registry.findDataset("bias", instrument="Cam1", detector=1)
3716 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3717 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3719 # Tests for registry.queryDatasets()
3720 with self.assertRaises(NoDefaultCollectionError):
3721 registry.queryDatasets("bias")
3722 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3724 result = registry.queryDatasets("bias", collections=[])
3725 self.assertEqual(len(list(result)), 0)
3726 messages = list(result.explain_no_results())
3727 self.assertTrue(messages)
3728 self.assertTrue(any("because collection list is empty" in message for message in messages))
3730 # Tests for registry.queryDataIds()
3731 with self.assertRaises(NoDefaultCollectionError):
3732 registry.queryDataIds("detector", datasets="bias")
3733 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3735 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3736 self.assertEqual(len(list(result)), 0)
3737 messages = list(result.explain_no_results())
3738 self.assertTrue(messages)
3739 self.assertTrue(any("because collection list is empty" in message for message in messages))
3741 # Tests for registry.queryDimensionRecords()
3742 with self.assertRaises(NoDefaultCollectionError):
3743 registry.queryDimensionRecords("detector", datasets="bias")
3744 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3746 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3747 self.assertEqual(len(list(result)), 0)
3748 messages = list(result.explain_no_results())
3749 self.assertTrue(messages)
3750 self.assertTrue(any("because collection list is empty" in message for message in messages))
3752 def test_dataset_followup_spatial_joins(self) -> None:
3753 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3754 is involved.
3755 """
3756 registry = self.makeRegistry()
3757 self.loadData(registry, "base.yaml")
3758 self.loadData(registry, "spatial.yaml")
3759 pvi_dataset_type = DatasetType(
3760 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3761 )
3762 registry.registerDatasetType(pvi_dataset_type)
3763 collection = "datasets"
3764 registry.registerRun(collection)
3765 (pvi1,) = registry.insertDatasets(
3766 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3767 )
3768 (pvi2,) = registry.insertDatasets(
3769 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3770 )
3771 (pvi3,) = registry.insertDatasets(
3772 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3773 )
3774 self.assertEqual(
3775 set(
3776 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3777 .expanded()
3778 .findRelatedDatasets("pvi", [collection])
3779 ),
3780 {
3781 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3782 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3783 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3784 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3785 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3786 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3787 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3788 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3789 },
3790 )