Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%
1534 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-05 02:53 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from datetime import timedelta
45from threading import Barrier
47import astropy.time
48import sqlalchemy
50try:
51 import numpy as np
52except ImportError:
53 np = None
55import lsst.sphgeom
56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
58from ..._dataset_association import DatasetAssociation
59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
60from ..._dataset_type import DatasetType
61from ..._exceptions import CollectionTypeError, MissingCollectionError, MissingDatasetTypeError
62from ..._exceptions_legacy import DatasetTypeError
63from ..._storage_class import StorageClass
64from ..._timespan import Timespan
65from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
66from .._collection_summary import CollectionSummary
67from .._collection_type import CollectionType
68from .._config import RegistryConfig
69from .._exceptions import (
70 ArgumentError,
71 CollectionError,
72 ConflictingDefinitionError,
73 DataIdValueError,
74 DatasetTypeExpressionError,
75 InconsistentDataIdError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from .._registry import Registry
80from ..interfaces import ButlerAttributeExistsError
81from ..sql_registry import SqlRegistry
84class RegistryTests(ABC):
85 """Generic tests for the `SqlRegistry` class that can be subclassed to
86 generate tests for different configurations.
87 """
89 collectionsManager: str | None = None
90 """Name of the collections manager class, if subclass provides value for
91 this member then it overrides name specified in default configuration
92 (`str`).
93 """
95 datasetsManager: str | dict[str, str] | None = None
96 """Name or configuration dictionary of the datasets manager class, if
97 subclass provides value for this member then it overrides name specified
98 in default configuration (`str` or `dict`).
99 """
101 supportsCollectionRegex: bool = True
102 """True if the registry class being tested supports regex searches for
103 collections."""
105 @classmethod
106 @abstractmethod
107 def getDataDir(cls) -> str:
108 """Return the root directory containing test data YAML files."""
109 raise NotImplementedError()
111 def makeRegistryConfig(self) -> RegistryConfig:
112 """Create RegistryConfig used to create a registry.
114 This method should be called by a subclass from `makeRegistry`.
115 Returned instance will be pre-configured based on the values of class
116 members, and default-configured for all other parameters. Subclasses
117 that need default configuration should just instantiate
118 `RegistryConfig` directly.
119 """
120 config = RegistryConfig()
121 if self.collectionsManager:
122 config["managers", "collections"] = self.collectionsManager
123 if self.datasetsManager:
124 config["managers", "datasets"] = self.datasetsManager
125 return config
127 @abstractmethod
128 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
129 """Return the Registry instance to be tested.
131 Parameters
132 ----------
133 share_repo_with : `Registry`, optional
134 If provided, the new registry should point to the same data
135 repository as this existing registry.
137 Returns
138 -------
139 registry : `Registry`
140 New `Registry` instance, or `None` *only* if `share_repo_with`
141 is not `None` and this test case does not support that argument
142 (e.g. it is impossible with in-memory SQLite DBs).
143 """
144 raise NotImplementedError()
146 def loadData(self, registry: SqlRegistry, filename: str) -> None:
147 """Load registry test data from ``getDataDir/<filename>``,
148 which should be a YAML import/export file.
150 Parameters
151 ----------
152 registry : `SqlRegistry`
153 The registry to load into.
154 filename : `str`
155 The name of the file to load.
156 """
157 from ...transfers import YamlRepoImportBackend
159 with open(os.path.join(self.getDataDir(), filename)) as stream:
160 backend = YamlRepoImportBackend(stream, registry)
161 backend.register()
162 backend.load(datastore=None)
164 def checkQueryResults(self, results, expected):
165 """Check that a query results object contains expected values.
167 Parameters
168 ----------
169 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
170 A lazy-evaluation query results object.
171 expected : `list`
172 A list of `DataCoordinate` o `DatasetRef` objects that should be
173 equal to results of the query, aside from ordering.
174 """
175 self.assertCountEqual(list(results), expected)
176 self.assertEqual(results.count(), len(expected))
177 if expected:
178 self.assertTrue(results.any())
179 else:
180 self.assertFalse(results.any())
182 def testOpaque(self):
183 """Tests for `SqlRegistry.registerOpaqueTable`,
184 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
185 `SqlRegistry.deleteOpaqueData`.
186 """
187 registry = self.makeRegistry()
188 table = "opaque_table_for_testing"
189 registry.registerOpaqueTable(
190 table,
191 spec=ddl.TableSpec(
192 fields=[
193 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
194 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
195 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
196 ],
197 ),
198 )
199 rows = [
200 {"id": 1, "name": "one", "count": None},
201 {"id": 2, "name": "two", "count": 5},
202 {"id": 3, "name": "three", "count": 6},
203 ]
204 registry.insertOpaqueData(table, *rows)
205 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
206 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
207 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
208 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
209 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
210 # Test very long IN clause which exceeds sqlite limit on number of
211 # parameters. SQLite says the limit is 32k but it looks like it is
212 # much higher.
213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
214 # Two IN clauses, each longer than 1k batch size, first with
215 # duplicates, second has matching elements in different batches (after
216 # sorting).
217 self.assertEqual(
218 rows[0:2],
219 list(
220 registry.fetchOpaqueData(
221 table,
222 id=list(range(1000)) + list(range(100, 0, -1)),
223 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
224 )
225 ),
226 )
227 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
228 registry.deleteOpaqueData(table, id=3)
229 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
230 registry.deleteOpaqueData(table)
231 self.assertEqual([], list(registry.fetchOpaqueData(table)))
233 def testDatasetType(self):
234 """Tests for `SqlRegistry.registerDatasetType` and
235 `SqlRegistry.getDatasetType`.
236 """
237 registry = self.makeRegistry()
238 # Check valid insert
239 datasetTypeName = "test"
240 storageClass = StorageClass("testDatasetType")
241 registry.storageClasses.registerStorageClass(storageClass)
242 dimensions = registry.dimensions.conform(("instrument", "visit"))
243 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
245 # Inserting for the first time should return True
246 self.assertTrue(registry.registerDatasetType(inDatasetType))
247 outDatasetType1 = registry.getDatasetType(datasetTypeName)
248 self.assertEqual(outDatasetType1, inDatasetType)
250 # Re-inserting should work
251 self.assertFalse(registry.registerDatasetType(inDatasetType))
252 # Except when they are not identical
253 with self.assertRaises(ConflictingDefinitionError):
254 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
255 registry.registerDatasetType(nonIdenticalDatasetType)
257 # Template can be None
258 datasetTypeName = "testNoneTemplate"
259 storageClass = StorageClass("testDatasetType2")
260 registry.storageClasses.registerStorageClass(storageClass)
261 dimensions = registry.dimensions.conform(("instrument", "visit"))
262 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
263 registry.registerDatasetType(inDatasetType)
264 outDatasetType2 = registry.getDatasetType(datasetTypeName)
265 self.assertEqual(outDatasetType2, inDatasetType)
267 allTypes = set(registry.queryDatasetTypes())
268 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
270 def testDimensions(self):
271 """Tests for `SqlRegistry.insertDimensionData`,
272 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
273 """
274 registry = self.makeRegistry()
275 dimensionName = "instrument"
276 dimension = registry.dimensions[dimensionName]
277 dimensionValue = {
278 "name": "DummyCam",
279 "visit_max": 10,
280 "visit_system": 0,
281 "exposure_max": 10,
282 "detector_max": 2,
283 "class_name": "lsst.pipe.base.Instrument",
284 }
285 registry.insertDimensionData(dimensionName, dimensionValue)
286 # Inserting the same value twice should fail
287 with self.assertRaises(sqlalchemy.exc.IntegrityError):
288 registry.insertDimensionData(dimensionName, dimensionValue)
289 # expandDataId should retrieve the record we just inserted
290 self.assertEqual(
291 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
292 .records[dimensionName]
293 .toDict(),
294 dimensionValue,
295 )
296 # expandDataId should raise if there is no record with the given ID.
297 with self.assertRaises(DataIdValueError):
298 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
299 # band doesn't have a table; insert should fail.
300 with self.assertRaises(TypeError):
301 registry.insertDimensionData("band", {"band": "i"})
302 dimensionName2 = "physical_filter"
303 dimension2 = registry.dimensions[dimensionName2]
304 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
305 # Missing required dependency ("instrument") should fail
306 with self.assertRaises(KeyError):
307 registry.insertDimensionData(dimensionName2, dimensionValue2)
308 # Adding required dependency should fix the failure
309 dimensionValue2["instrument"] = "DummyCam"
310 registry.insertDimensionData(dimensionName2, dimensionValue2)
311 # expandDataId should retrieve the record we just inserted.
312 self.assertEqual(
313 registry.expandDataId(
314 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
315 )
316 .records[dimensionName2]
317 .toDict(),
318 dimensionValue2,
319 )
320 # Use syncDimensionData to insert a new record successfully.
321 dimensionName3 = "detector"
322 dimensionValue3 = {
323 "instrument": "DummyCam",
324 "id": 1,
325 "full_name": "one",
326 "name_in_raft": "zero",
327 "purpose": "SCIENCE",
328 }
329 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
330 # Sync that again. Note that one field ("raft") is NULL, and that
331 # should be okay.
332 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
333 # Now try that sync with the same primary key but a different value.
334 # This should fail.
335 with self.assertRaises(ConflictingDefinitionError):
336 registry.syncDimensionData(
337 dimensionName3,
338 {
339 "instrument": "DummyCam",
340 "id": 1,
341 "full_name": "one",
342 "name_in_raft": "four",
343 "purpose": "SCIENCE",
344 },
345 )
347 @unittest.skipIf(np is None, "numpy not available.")
348 def testNumpyDataId(self):
349 """Test that we can use a numpy int in a dataId."""
350 registry = self.makeRegistry()
351 dimensionEntries = [
352 ("instrument", {"instrument": "DummyCam"}),
353 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
354 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
355 # Using an np.int64 here fails unless Records.fromDict is also
356 # patched to look for numbers.Integral
357 (
358 "visit",
359 {
360 "instrument": "DummyCam",
361 "id": 42,
362 "name": "fortytwo",
363 "physical_filter": "d-r",
364 "day_obs": 20250101,
365 },
366 ),
367 ]
368 for args in dimensionEntries:
369 registry.insertDimensionData(*args)
371 # Try a normal integer and something that looks like an int but
372 # is not.
373 for visit_id in (42, np.int64(42)):
374 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
375 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
376 self.assertEqual(expanded["visit"], int(visit_id))
377 self.assertIsInstance(expanded["visit"], int)
379 def testDataIdRelationships(self):
380 """Test that `SqlRegistry.expandDataId` raises an exception when the
381 given keys are inconsistent.
382 """
383 registry = self.makeRegistry()
384 self.loadData(registry, "base.yaml")
385 # Insert a few more dimension records for the next test.
386 registry.insertDimensionData(
387 "day_obs",
388 {"instrument": "Cam1", "id": 20250101},
389 )
390 registry.insertDimensionData(
391 "group",
392 {"instrument": "Cam1", "name": "group1"},
393 )
394 registry.insertDimensionData(
395 "exposure",
396 {
397 "instrument": "Cam1",
398 "id": 1,
399 "obs_id": "one",
400 "physical_filter": "Cam1-G",
401 "group": "group1",
402 "day_obs": 20250101,
403 },
404 )
405 registry.insertDimensionData(
406 "group",
407 {"instrument": "Cam1", "name": "group2"},
408 )
409 registry.insertDimensionData(
410 "exposure",
411 {
412 "instrument": "Cam1",
413 "id": 2,
414 "obs_id": "two",
415 "physical_filter": "Cam1-G",
416 "group": "group2",
417 "day_obs": 20250101,
418 },
419 )
420 registry.insertDimensionData(
421 "visit_system",
422 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
423 )
424 registry.insertDimensionData(
425 "visit",
426 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
427 )
428 registry.insertDimensionData(
429 "visit_definition",
430 {"instrument": "Cam1", "visit": 1, "exposure": 1},
431 )
432 with self.assertRaises(InconsistentDataIdError):
433 registry.expandDataId(
434 {"instrument": "Cam1", "visit": 1, "exposure": 2},
435 )
437 def testDataset(self):
438 """Basic tests for `SqlRegistry.insertDatasets`,
439 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
440 """
441 registry = self.makeRegistry()
442 self.loadData(registry, "base.yaml")
443 run = "tésτ"
444 registry.registerRun(run)
445 datasetType = registry.getDatasetType("bias")
446 dataId = {"instrument": "Cam1", "detector": 2}
447 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
448 outRef = registry.getDataset(ref.id)
449 self.assertIsNotNone(ref.id)
450 self.assertEqual(ref, outRef)
451 with self.assertRaises(ConflictingDefinitionError):
452 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
453 registry.removeDatasets([ref])
454 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
456 def testFindDataset(self):
457 """Tests for `SqlRegistry.findDataset`."""
458 registry = self.makeRegistry()
459 self.loadData(registry, "base.yaml")
460 run = "tésτ"
461 datasetType = registry.getDatasetType("bias")
462 dataId = {"instrument": "Cam1", "detector": 4}
463 registry.registerRun(run)
464 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
465 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
466 self.assertEqual(outputRef, inputRef)
467 # Check that retrieval with invalid dataId raises
468 with self.assertRaises(LookupError):
469 dataId = {"instrument": "Cam1"} # no detector
470 registry.findDataset(datasetType, dataId, collections=run)
471 # Check that different dataIds match to different datasets
472 dataId1 = {"instrument": "Cam1", "detector": 1}
473 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
474 dataId2 = {"instrument": "Cam1", "detector": 2}
475 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
476 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
477 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
478 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
479 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
480 # Check that requesting a non-existing dataId returns None
481 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
482 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
483 # Search more than one collection, in which two have the right
484 # dataset type and another does not.
485 registry.registerRun("empty")
486 self.loadData(registry, "datasets.yaml")
487 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
488 self.assertIsNotNone(bias1)
489 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
490 self.assertIsNotNone(bias2)
491 self.assertEqual(
492 bias1,
493 registry.findDataset(
494 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
495 ),
496 )
497 self.assertEqual(
498 bias2,
499 registry.findDataset(
500 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
501 ),
502 )
503 # Search more than one collection, with one of them a CALIBRATION
504 # collection.
505 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
506 timespan = Timespan(
507 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
508 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
509 )
510 registry.certify("Cam1/calib", [bias2], timespan=timespan)
511 self.assertEqual(
512 bias1,
513 registry.findDataset(
514 "bias",
515 instrument="Cam1",
516 detector=2,
517 collections=["empty", "imported_g", "Cam1/calib"],
518 timespan=timespan,
519 ),
520 )
521 self.assertEqual(
522 bias2,
523 registry.findDataset(
524 "bias",
525 instrument="Cam1",
526 detector=2,
527 collections=["empty", "Cam1/calib", "imported_g"],
528 timespan=timespan,
529 ),
530 )
531 # If we try to search those same collections without a timespan, it
532 # should still work, since the CALIBRATION collection is ignored.
533 self.assertEqual(
534 bias1,
535 registry.findDataset(
536 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
537 ),
538 )
539 self.assertEqual(
540 bias1,
541 registry.findDataset(
542 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
543 ),
544 )
546 def testRemoveDatasetTypeSuccess(self):
547 """Test that SqlRegistry.removeDatasetType works when there are no
548 datasets of that type present.
549 """
550 registry = self.makeRegistry()
551 self.loadData(registry, "base.yaml")
552 registry.removeDatasetType("flat")
553 with self.assertRaises(MissingDatasetTypeError):
554 registry.getDatasetType("flat")
556 def testRemoveDatasetTypeFailure(self):
557 """Test that SqlRegistry.removeDatasetType raises when there are
558 datasets of that type present or if the dataset type is for a
559 component.
560 """
561 registry = self.makeRegistry()
562 self.loadData(registry, "base.yaml")
563 self.loadData(registry, "datasets.yaml")
564 with self.assertRaises(OrphanedRecordError):
565 registry.removeDatasetType("flat")
566 with self.assertRaises(DatasetTypeError):
567 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
569 def testImportDatasetsUUID(self):
570 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
571 if isinstance(self.datasetsManager, str):
572 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
573 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
574 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
575 ".ByDimensionsDatasetRecordStorageManagerUUID"
576 ):
577 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
579 registry = self.makeRegistry()
580 self.loadData(registry, "base.yaml")
581 for run in range(6):
582 registry.registerRun(f"run{run}")
583 datasetTypeBias = registry.getDatasetType("bias")
584 datasetTypeFlat = registry.getDatasetType("flat")
585 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
586 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
587 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
589 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
590 (ref1,) = registry._importDatasets([ref])
591 # UUID is used without change
592 self.assertEqual(ref.id, ref1.id)
594 # All different failure modes
595 refs = (
596 # Importing same DatasetRef with different dataset ID is an error
597 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
598 # Same DatasetId but different DataId
599 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
600 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
601 # Same DatasetRef and DatasetId but different run
602 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
603 )
604 for ref in refs:
605 with self.assertRaises(ConflictingDefinitionError):
606 registry._importDatasets([ref])
608 # Test for non-unique IDs, they can be re-imported multiple times.
609 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
610 with self.subTest(idGenMode=idGenMode):
611 # Make dataset ref with reproducible dataset ID.
612 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
613 (ref1,) = registry._importDatasets([ref])
614 self.assertIsInstance(ref1.id, uuid.UUID)
615 self.assertEqual(ref1.id.version, 5)
616 self.assertEqual(ref1.id, ref.id)
618 # Importing it again is OK
619 (ref2,) = registry._importDatasets([ref1])
620 self.assertEqual(ref2.id, ref1.id)
622 # Cannot import to different run with the same ID
623 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
624 with self.assertRaises(ConflictingDefinitionError):
625 registry._importDatasets([ref])
627 ref = DatasetRef(
628 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
629 )
630 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
631 # Cannot import same DATAID_TYPE ref into a new run
632 with self.assertRaises(ConflictingDefinitionError):
633 (ref2,) = registry._importDatasets([ref])
634 else:
635 # DATAID_TYPE_RUN ref can be imported into a new run
636 (ref2,) = registry._importDatasets([ref])
638 def testComponentLookups(self):
639 """Test searching for component datasets via their parents.
641 Components can no longer be found by registry. This test checks
642 that this now fails.
643 """
644 registry = self.makeRegistry()
645 self.loadData(registry, "base.yaml")
646 self.loadData(registry, "datasets.yaml")
647 # Test getting the child dataset type (which does still exist in the
648 # Registry), and check for consistency with
649 # DatasetRef.makeComponentRef.
650 collection = "imported_g"
651 parentType = registry.getDatasetType("bias")
652 childType = registry.getDatasetType("bias.wcs")
653 parentRefResolved = registry.findDataset(
654 parentType, collections=collection, instrument="Cam1", detector=1
655 )
656 self.assertIsInstance(parentRefResolved, DatasetRef)
657 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
658 # Search for a single dataset with findDataset.
659 with self.assertRaises(DatasetTypeError):
660 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
662 def testCollections(self):
663 """Tests for registry methods that manage collections."""
664 registry = self.makeRegistry()
665 other_registry = self.makeRegistry(share_repo_with=registry)
666 self.loadData(registry, "base.yaml")
667 self.loadData(registry, "datasets.yaml")
668 run1 = "imported_g"
669 run2 = "imported_r"
670 # Test setting a collection docstring after it has been created.
671 registry.setCollectionDocumentation(run1, "doc for run1")
672 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
673 registry.setCollectionDocumentation(run1, None)
674 self.assertIsNone(registry.getCollectionDocumentation(run1))
675 datasetType = "bias"
676 # Find some datasets via their run's collection.
677 dataId1 = {"instrument": "Cam1", "detector": 1}
678 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
679 self.assertIsNotNone(ref1)
680 dataId2 = {"instrument": "Cam1", "detector": 2}
681 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
682 self.assertIsNotNone(ref2)
683 # Associate those into a new collection, then look for them there.
684 tag1 = "tag1"
685 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
686 # Check that we can query for old and new collections by type.
687 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
688 self.assertEqual(
689 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
690 {tag1, run1, run2},
691 )
692 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
693 registry.associate(tag1, [ref1, ref2])
694 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
695 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
696 # Disassociate one and verify that we can't it there anymore...
697 registry.disassociate(tag1, [ref1])
698 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
699 # ...but we can still find ref2 in tag1, and ref1 in the run.
700 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
701 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
702 collections = set(registry.queryCollections())
703 self.assertEqual(collections, {run1, run2, tag1})
704 # Associate both refs into tag1 again; ref2 is already there, but that
705 # should be a harmless no-op.
706 registry.associate(tag1, [ref1, ref2])
707 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
708 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
709 # Get a different dataset (from a different run) that has the same
710 # dataset type and data ID as ref2.
711 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
712 self.assertNotEqual(ref2, ref2b)
713 # Attempting to associate that into tag1 should be an error.
714 with self.assertRaises(ConflictingDefinitionError):
715 registry.associate(tag1, [ref2b])
716 # That error shouldn't have messed up what we had before.
717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
719 # Attempt to associate the conflicting dataset again, this time with
720 # a dataset that isn't in the collection and won't cause a conflict.
721 # Should also fail without modifying anything.
722 dataId3 = {"instrument": "Cam1", "detector": 3}
723 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
724 with self.assertRaises(ConflictingDefinitionError):
725 registry.associate(tag1, [ref3, ref2b])
726 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
727 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
728 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
729 # Register a chained collection that searches [tag1, run2]
730 chain1 = "chain1"
731 registry.registerCollection(chain1, type=CollectionType.CHAINED)
732 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
733 # Chained collection exists, but has no collections in it.
734 self.assertFalse(registry.getCollectionChain(chain1))
735 # If we query for all collections, we should get the chained collection
736 # only if we don't ask to flatten it (i.e. yield only its children).
737 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
738 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
739 # Attempt to set its child collections to something circular; that
740 # should fail.
741 with self.assertRaises(ValueError):
742 registry.setCollectionChain(chain1, [tag1, chain1])
743 # Add the child collections.
744 registry.setCollectionChain(chain1, [tag1, run2])
745 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
746 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
747 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
748 # Refresh the other registry that points to the same repo, and make
749 # sure it can see the things we've done (note that this does require
750 # an explicit refresh(); that's the documented behavior, because
751 # caching is ~impossible otherwise).
752 if other_registry is not None:
753 other_registry.refresh()
754 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
755 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
756 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
757 # Searching for dataId1 or dataId2 in the chain should return ref1 and
758 # ref2, because both are in tag1.
759 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
760 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
761 # Now disassociate ref2 from tag1. The search (for bias) with
762 # dataId2 in chain1 should then:
763 # 1. not find it in tag1
764 # 2. find a different dataset in run2
765 registry.disassociate(tag1, [ref2])
766 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
767 self.assertNotEqual(ref2b, ref2)
768 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
769 # Define a new chain so we can test recursive chains.
770 chain2 = "chain2"
771 registry.registerCollection(chain2, type=CollectionType.CHAINED)
772 registry.setCollectionChain(chain2, [run2, chain1])
773 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
774 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
776 if self.supportsCollectionRegex:
777 # Query for collections matching a regex.
778 self.assertCountEqual(
779 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
780 ["imported_r", "imported_g"],
781 )
782 # Query for collections matching a regex or an explicit str.
783 self.assertCountEqual(
784 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
785 ["imported_r", "imported_g", "chain1"],
786 )
787 # Same queries as the regex ones above, but using globs instead of
788 # regex.
789 self.assertCountEqual(
790 list(registry.queryCollections("imported_*", flattenChains=False)),
791 ["imported_r", "imported_g"],
792 )
793 # Query for collections matching a regex or an explicit str.
794 self.assertCountEqual(
795 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
796 ["imported_r", "imported_g", "chain1"],
797 )
799 # Search for bias with dataId1 should find it via tag1 in chain2,
800 # recursing, because is not in run1.
801 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
802 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
803 # Search for bias with dataId2 should find it in run2 (ref2b).
804 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
805 # Search for a flat that is in run2. That should not be found
806 # at the front of chain2, because of the restriction to bias
807 # on run2 there, but it should be found in at the end of chain1.
808 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
809 ref4 = registry.findDataset("flat", dataId4, collections=run2)
810 self.assertIsNotNone(ref4)
811 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
812 # Deleting a collection that's part of a CHAINED collection is not
813 # allowed, and is exception-safe.
814 with self.assertRaises(sqlalchemy.exc.IntegrityError):
815 registry.removeCollection(run2)
816 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
817 with self.assertRaises(sqlalchemy.exc.IntegrityError):
818 registry.removeCollection(chain1)
819 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
820 # Actually remove chain2, test that it's gone by asking for its type.
821 registry.removeCollection(chain2)
822 with self.assertRaises(MissingCollectionError):
823 registry.getCollectionType(chain2)
824 # Actually remove run2 and chain1, which should work now.
825 registry.removeCollection(chain1)
826 registry.removeCollection(run2)
827 with self.assertRaises(MissingCollectionError):
828 registry.getCollectionType(run2)
829 with self.assertRaises(MissingCollectionError):
830 registry.getCollectionType(chain1)
831 # Remove tag1 as well, just to test that we can remove TAGGED
832 # collections.
833 registry.removeCollection(tag1)
834 with self.assertRaises(MissingCollectionError):
835 registry.getCollectionType(tag1)
837 def testCollectionChainFlatten(self):
838 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
839 option.
840 """
841 registry = self.makeRegistry()
842 registry.registerCollection("inner", CollectionType.CHAINED)
843 registry.registerCollection("innermost", CollectionType.RUN)
844 registry.setCollectionChain("inner", ["innermost"])
845 registry.registerCollection("outer", CollectionType.CHAINED)
846 registry.setCollectionChain("outer", ["inner"], flatten=False)
847 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
848 registry.setCollectionChain("outer", ["inner"], flatten=True)
849 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
851 def testCollectionChainPrependConcurrency(self):
852 """Verify that locking via database row locks is working as
853 expected.
854 """
856 def blocked_thread_func(registry: SqlRegistry):
857 # This call will become blocked after it has decided on positions
858 # for the new children in the collection chain, but before
859 # inserting them.
860 registry._managers.collections.prepend_collection_chain("chain", ["a"])
862 def unblocked_thread_func(registry: SqlRegistry):
863 registry._managers.collections.prepend_collection_chain("chain", ["b"])
865 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
867 # blocked_thread_func should have finished first, inserting "a".
868 # unblocked_thread_func should have finished second, prepending "b".
869 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
871 def testCollectionChainReplaceConcurrency(self):
872 """Verify that locking via database row locks is working as
873 expected.
874 """
876 def blocked_thread_func(registry: SqlRegistry):
877 # This call will become blocked after deleting children, but before
878 # inserting new ones.
879 registry.setCollectionChain("chain", ["a"])
881 def unblocked_thread_func(registry: SqlRegistry):
882 registry.setCollectionChain("chain", ["b"])
884 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
886 # blocked_thread_func should have finished first.
887 # unblocked_thread_func should have finished second, overwriting the
888 # chain with "b".
889 self.assertEqual(("b",), registry.getCollectionChain("chain"))
891 def _do_collection_concurrency_test(
892 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]]
893 ) -> SqlRegistry:
894 # This function:
895 # 1. Sets up two registries pointing at the same database.
896 # 2. Start running 'blocked_thread_func' in a background thread,
897 # arranging for it to become blocked during a critical section in
898 # the collections manager.
899 # 3. Wait for 'blocked_thread_func' to reach the critical section
900 # 4. Start running 'unblocked_thread_func'.
901 # 5. Allow both functions to run to completion.
903 # Set up two registries pointing to the same DB
904 registry1 = self.makeRegistry()
905 assert isinstance(registry1, SqlRegistry)
906 registry2 = self.makeRegistry(share_repo_with=registry1)
907 if registry2 is None:
908 # This will happen for in-memory SQL databases.
909 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
911 registry1.registerCollection("chain", CollectionType.CHAINED)
912 for collection in ["a", "b"]:
913 registry1.registerCollection(collection)
915 # Arrange for registry1 to block during its critical section, allowing
916 # us to detect this and control when it becomes unblocked.
917 enter_barrier = Barrier(2, timeout=60)
918 exit_barrier = Barrier(2, timeout=60)
920 def wait_for_barrier():
921 enter_barrier.wait()
922 exit_barrier.wait()
924 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
926 with ThreadPoolExecutor(max_workers=1) as exec1:
927 with ThreadPoolExecutor(max_workers=1) as exec2:
928 future1 = exec1.submit(blocked_thread_func, registry1)
929 enter_barrier.wait()
931 # At this point registry 1 has entered the critical section and
932 # is waiting for us to release it. Start the other thread.
933 future2 = exec2.submit(unblocked_thread_func, registry2)
934 # thread2 should block inside a database call, but we have no
935 # way to detect when it is in this state.
936 time.sleep(0.200)
938 # Let the threads run to completion.
939 exit_barrier.wait()
940 future1.result()
941 future2.result()
943 return registry1
945 def testBasicTransaction(self):
946 """Test that all operations within a single transaction block are
947 rolled back if an exception propagates out of the block.
948 """
949 registry = self.makeRegistry()
950 storageClass = StorageClass("testDatasetType")
951 registry.storageClasses.registerStorageClass(storageClass)
952 with registry.transaction():
953 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
954 with self.assertRaises(ValueError):
955 with registry.transaction():
956 registry.insertDimensionData("instrument", {"name": "Cam2"})
957 raise ValueError("Oops, something went wrong")
958 # Cam1 should exist
959 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
960 # But Cam2 and Cam3 should both not exist
961 with self.assertRaises(DataIdValueError):
962 registry.expandDataId(instrument="Cam2")
963 with self.assertRaises(DataIdValueError):
964 registry.expandDataId(instrument="Cam3")
966 def testNestedTransaction(self):
967 """Test that operations within a transaction block are not rolled back
968 if an exception propagates out of an inner transaction block and is
969 then caught.
970 """
971 registry = self.makeRegistry()
972 dimension = registry.dimensions["instrument"]
973 dataId1 = {"instrument": "DummyCam"}
974 dataId2 = {"instrument": "DummyCam2"}
975 checkpointReached = False
976 with registry.transaction():
977 # This should be added and (ultimately) committed.
978 registry.insertDimensionData(dimension, dataId1)
979 with self.assertRaises(sqlalchemy.exc.IntegrityError):
980 with registry.transaction(savepoint=True):
981 # This does not conflict, and should succeed (but not
982 # be committed).
983 registry.insertDimensionData(dimension, dataId2)
984 checkpointReached = True
985 # This should conflict and raise, triggerring a rollback
986 # of the previous insertion within the same transaction
987 # context, but not the original insertion in the outer
988 # block.
989 registry.insertDimensionData(dimension, dataId1)
990 self.assertTrue(checkpointReached)
991 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
992 with self.assertRaises(DataIdValueError):
993 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
995 def testInstrumentDimensions(self):
996 """Test queries involving only instrument dimensions, with no joins to
997 skymap.
998 """
999 registry = self.makeRegistry()
1001 # need a bunch of dimensions and datasets for test
1002 registry.insertDimensionData(
1003 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1004 )
1005 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1006 registry.insertDimensionData(
1007 "physical_filter",
1008 dict(instrument="DummyCam", name="dummy_r", band="r"),
1009 dict(instrument="DummyCam", name="dummy_i", band="i"),
1010 )
1011 registry.insertDimensionData(
1012 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1013 )
1014 registry.insertDimensionData(
1015 "visit",
1016 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1017 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1018 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1019 )
1020 registry.insertDimensionData(
1021 "group",
1022 dict(instrument="DummyCam", name="ten"),
1023 dict(instrument="DummyCam", name="eleven"),
1024 dict(instrument="DummyCam", name="twelve"),
1025 )
1026 for i in range(1, 6):
1027 registry.insertDimensionData(
1028 "visit_detector_region",
1029 dict(instrument="DummyCam", visit=10, detector=i),
1030 dict(instrument="DummyCam", visit=11, detector=i),
1031 dict(instrument="DummyCam", visit=20, detector=i),
1032 )
1033 registry.insertDimensionData(
1034 "exposure",
1035 dict(
1036 instrument="DummyCam",
1037 id=100,
1038 obs_id="100",
1039 physical_filter="dummy_i",
1040 group="ten",
1041 day_obs=20250101,
1042 ),
1043 dict(
1044 instrument="DummyCam",
1045 id=101,
1046 obs_id="101",
1047 physical_filter="dummy_i",
1048 group="ten",
1049 day_obs=20250101,
1050 ),
1051 dict(
1052 instrument="DummyCam",
1053 id=110,
1054 obs_id="110",
1055 physical_filter="dummy_r",
1056 group="eleven",
1057 day_obs=20250101,
1058 ),
1059 dict(
1060 instrument="DummyCam",
1061 id=111,
1062 obs_id="111",
1063 physical_filter="dummy_r",
1064 group="eleven",
1065 day_obs=20250101,
1066 ),
1067 dict(
1068 instrument="DummyCam",
1069 id=200,
1070 obs_id="200",
1071 physical_filter="dummy_r",
1072 group="twelve",
1073 day_obs=20250101,
1074 ),
1075 dict(
1076 instrument="DummyCam",
1077 id=201,
1078 obs_id="201",
1079 physical_filter="dummy_r",
1080 group="twelve",
1081 day_obs=20250101,
1082 ),
1083 )
1084 registry.insertDimensionData(
1085 "visit_definition",
1086 dict(instrument="DummyCam", exposure=100, visit=10),
1087 dict(instrument="DummyCam", exposure=101, visit=10),
1088 dict(instrument="DummyCam", exposure=110, visit=11),
1089 dict(instrument="DummyCam", exposure=111, visit=11),
1090 dict(instrument="DummyCam", exposure=200, visit=20),
1091 dict(instrument="DummyCam", exposure=201, visit=20),
1092 )
1093 # dataset types
1094 run1 = "test1_r"
1095 run2 = "test2_r"
1096 tagged2 = "test2_t"
1097 registry.registerRun(run1)
1098 registry.registerRun(run2)
1099 registry.registerCollection(tagged2)
1100 storageClass = StorageClass("testDataset")
1101 registry.storageClasses.registerStorageClass(storageClass)
1102 rawType = DatasetType(
1103 name="RAW",
1104 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1105 storageClass=storageClass,
1106 )
1107 registry.registerDatasetType(rawType)
1108 calexpType = DatasetType(
1109 name="CALEXP",
1110 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1111 storageClass=storageClass,
1112 )
1113 registry.registerDatasetType(calexpType)
1115 # add pre-existing datasets
1116 for exposure in (100, 101, 110, 111):
1117 for detector in (1, 2, 3):
1118 # note that only 3 of 5 detectors have datasets
1119 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1120 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1121 # exposures 100 and 101 appear in both run1 and tagged2.
1122 # 100 has different datasets in the different collections
1123 # 101 has the same dataset in both collections.
1124 if exposure == 100:
1125 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1126 if exposure in (100, 101):
1127 registry.associate(tagged2, [ref])
1128 # Add pre-existing datasets to tagged2.
1129 for exposure in (200, 201):
1130 for detector in (3, 4, 5):
1131 # note that only 3 of 5 detectors have datasets
1132 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1133 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1134 registry.associate(tagged2, [ref])
1136 dimensions = registry.dimensions.conform(
1137 rawType.dimensions.required.names | calexpType.dimensions.required.names
1138 )
1139 # Test that single dim string works as well as list of str
1140 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1141 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1142 self.assertEqual(rows, rowsI)
1143 # with empty expression
1144 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1145 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1146 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1147 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1148 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1150 # second collection
1151 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1152 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1153 for dataId in rows:
1154 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1155 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1156 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1157 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1159 # with two input datasets
1160 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1161 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1162 for dataId in rows:
1163 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1164 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1165 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1166 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1168 # limit to single visit
1169 rows = registry.queryDataIds(
1170 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1171 ).toSet()
1172 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1173 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1174 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1175 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1177 # more limiting expression, using link names instead of Table.column
1178 rows = registry.queryDataIds(
1179 dimensions,
1180 datasets=rawType,
1181 collections=run1,
1182 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1183 ).toSet()
1184 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1185 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1186 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1187 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1189 # queryDataIds with only one of `datasets` and `collections` is an
1190 # error.
1191 with self.assertRaises(CollectionError):
1192 registry.queryDataIds(dimensions, datasets=rawType)
1193 with self.assertRaises(ArgumentError):
1194 registry.queryDataIds(dimensions, collections=run1)
1196 # expression excludes everything
1197 rows = registry.queryDataIds(
1198 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1199 ).toSet()
1200 self.assertEqual(len(rows), 0)
1202 # Selecting by physical_filter, this is not in the dimensions, but it
1203 # is a part of the full expression so it should work too.
1204 rows = registry.queryDataIds(
1205 dimensions,
1206 datasets=rawType,
1207 collections=run1,
1208 where="physical_filter = 'dummy_r'",
1209 instrument="DummyCam",
1210 ).toSet()
1211 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1212 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1213 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1214 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1216 def testSkyMapDimensions(self):
1217 """Tests involving only skymap dimensions, no joins to instrument."""
1218 registry = self.makeRegistry()
1220 # need a bunch of dimensions and datasets for test, we want
1221 # "band" in the test so also have to add physical_filter
1222 # dimensions
1223 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1224 registry.insertDimensionData(
1225 "physical_filter",
1226 dict(instrument="DummyCam", name="dummy_r", band="r"),
1227 dict(instrument="DummyCam", name="dummy_i", band="i"),
1228 )
1229 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1230 for tract in range(10):
1231 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1232 registry.insertDimensionData(
1233 "patch",
1234 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1235 )
1237 # dataset types
1238 run = "tésτ"
1239 registry.registerRun(run)
1240 storageClass = StorageClass("testDataset")
1241 registry.storageClasses.registerStorageClass(storageClass)
1242 calexpType = DatasetType(
1243 name="deepCoadd_calexp",
1244 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1245 storageClass=storageClass,
1246 )
1247 registry.registerDatasetType(calexpType)
1248 mergeType = DatasetType(
1249 name="deepCoadd_mergeDet",
1250 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1251 storageClass=storageClass,
1252 )
1253 registry.registerDatasetType(mergeType)
1254 measType = DatasetType(
1255 name="deepCoadd_meas",
1256 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1257 storageClass=storageClass,
1258 )
1259 registry.registerDatasetType(measType)
1261 dimensions = registry.dimensions.conform(
1262 calexpType.dimensions.required.names
1263 | mergeType.dimensions.required.names
1264 | measType.dimensions.required.names
1265 )
1267 # add pre-existing datasets
1268 for tract in (1, 3, 5):
1269 for patch in (2, 4, 6, 7):
1270 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1271 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1272 for aFilter in ("i", "r"):
1273 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1274 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1276 # with empty expression
1277 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1278 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1279 for dataId in rows:
1280 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1281 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1282 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1283 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1285 # limit to 2 tracts and 2 patches
1286 rows = registry.queryDataIds(
1287 dimensions,
1288 datasets=[calexpType, mergeType],
1289 collections=run,
1290 where="tract IN (1, 5) AND patch IN (2, 7)",
1291 skymap="DummyMap",
1292 ).toSet()
1293 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1294 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1295 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1296 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1298 # limit to single filter
1299 rows = registry.queryDataIds(
1300 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1301 ).toSet()
1302 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1303 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1304 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1305 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1307 # Specifying non-existing skymap is an exception
1308 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1309 rows = registry.queryDataIds(
1310 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1311 ).toSet()
1313 def testSpatialJoin(self):
1314 """Test queries that involve spatial overlap joins."""
1315 registry = self.makeRegistry()
1316 self.loadData(registry, "hsc-rc2-subset.yaml")
1318 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1319 # the TopologicalFamily they belong to. We'll relate all elements in
1320 # each family to all of the elements in each other family.
1321 families = defaultdict(set)
1322 # Dictionary of {element.name: {dataId: region}}.
1323 regions = {}
1324 for element in registry.dimensions.database_elements:
1325 if element.spatial is not None:
1326 families[element.spatial.name].add(element)
1327 regions[element.name] = {
1328 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1329 }
1331 # If this check fails, it's not necessarily a problem - it may just be
1332 # a reasonable change to the default dimension definitions - but the
1333 # test below depends on there being more than one family to do anything
1334 # useful.
1335 self.assertEqual(len(families), 2)
1337 # Overlap DatabaseDimensionElements with each other.
1338 for family1, family2 in itertools.combinations(families, 2):
1339 for element1, element2 in itertools.product(families[family1], families[family2]):
1340 dimensions = element1.minimal_group | element2.minimal_group
1341 # Construct expected set of overlapping data IDs via a
1342 # brute-force comparison of the regions we've already fetched.
1343 expected = {
1344 DataCoordinate.standardize(
1345 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1346 )
1347 for (dataId1, region1), (dataId2, region2) in itertools.product(
1348 regions[element1.name].items(), regions[element2.name].items()
1349 )
1350 if not region1.isDisjointFrom(region2)
1351 }
1352 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1353 queried = set(registry.queryDataIds(dimensions))
1354 self.assertEqual(expected, queried)
1356 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1357 commonSkyPix = registry.dimensions.commonSkyPix
1358 for elementName, these_regions in regions.items():
1359 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1360 expected = set()
1361 for dataId, region in these_regions.items():
1362 for begin, end in commonSkyPix.pixelization.envelope(region):
1363 expected.update(
1364 DataCoordinate.standardize(
1365 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1366 )
1367 for index in range(begin, end)
1368 )
1369 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1370 queried = set(registry.queryDataIds(dimensions))
1371 self.assertEqual(expected, queried)
1373 def testAbstractQuery(self):
1374 """Test that we can run a query that just lists the known
1375 bands. This is tricky because band is
1376 backed by a query against physical_filter.
1377 """
1378 registry = self.makeRegistry()
1379 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1380 registry.insertDimensionData(
1381 "physical_filter",
1382 dict(instrument="DummyCam", name="dummy_i", band="i"),
1383 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1384 dict(instrument="DummyCam", name="dummy_r", band="r"),
1385 )
1386 rows = registry.queryDataIds(["band"]).toSet()
1387 self.assertCountEqual(
1388 rows,
1389 [
1390 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1391 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1392 ],
1393 )
1395 def testAttributeManager(self):
1396 """Test basic functionality of attribute manager."""
1397 # number of attributes with schema versions in a fresh database,
1398 # 6 managers with 2 records per manager, plus config for dimensions
1399 VERSION_COUNT = 6 * 2 + 1
1401 registry = self.makeRegistry()
1402 attributes = registry._managers.attributes
1404 # check what get() returns for non-existing key
1405 self.assertIsNone(attributes.get("attr"))
1406 self.assertEqual(attributes.get("attr", ""), "")
1407 self.assertEqual(attributes.get("attr", "Value"), "Value")
1408 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1410 # cannot store empty key or value
1411 with self.assertRaises(ValueError):
1412 attributes.set("", "value")
1413 with self.assertRaises(ValueError):
1414 attributes.set("attr", "")
1416 # set value of non-existing key
1417 attributes.set("attr", "value")
1418 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1419 self.assertEqual(attributes.get("attr"), "value")
1421 # update value of existing key
1422 with self.assertRaises(ButlerAttributeExistsError):
1423 attributes.set("attr", "value2")
1425 attributes.set("attr", "value2", force=True)
1426 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1427 self.assertEqual(attributes.get("attr"), "value2")
1429 # delete existing key
1430 self.assertTrue(attributes.delete("attr"))
1431 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1433 # delete non-existing key
1434 self.assertFalse(attributes.delete("non-attr"))
1436 # store bunch of keys and get the list back
1437 data = [
1438 ("version.core", "1.2.3"),
1439 ("version.dimensions", "3.2.1"),
1440 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1441 ]
1442 for key, value in data:
1443 attributes.set(key, value)
1444 items = dict(attributes.items())
1445 for key, value in data:
1446 self.assertEqual(items[key], value)
1448 def testQueryDatasetsDeduplication(self):
1449 """Test that the findFirst option to queryDatasets selects datasets
1450 from collections in the order given".
1451 """
1452 registry = self.makeRegistry()
1453 self.loadData(registry, "base.yaml")
1454 self.loadData(registry, "datasets.yaml")
1455 self.assertCountEqual(
1456 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1457 [
1458 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1459 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1460 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1461 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1462 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1463 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1464 ],
1465 )
1466 self.assertCountEqual(
1467 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1468 [
1469 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1470 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1471 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1472 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1473 ],
1474 )
1475 self.assertCountEqual(
1476 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1477 [
1478 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1479 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1480 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1481 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1482 ],
1483 )
1485 def testQueryResults(self):
1486 """Test querying for data IDs and then manipulating the QueryResults
1487 object returned to perform other queries.
1488 """
1489 registry = self.makeRegistry()
1490 self.loadData(registry, "base.yaml")
1491 self.loadData(registry, "datasets.yaml")
1492 bias = registry.getDatasetType("bias")
1493 flat = registry.getDatasetType("flat")
1494 # Obtain expected results from methods other than those we're testing
1495 # here. That includes:
1496 # - the dimensions of the data IDs we want to query:
1497 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1498 # - the dimensions of some other data IDs we'll extract from that:
1499 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1500 # - the data IDs we expect to obtain from the first queries:
1501 expectedDataIds = DataCoordinateSet(
1502 {
1503 DataCoordinate.standardize(
1504 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1505 )
1506 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1507 },
1508 dimensions=expected_dimensions,
1509 hasFull=False,
1510 hasRecords=False,
1511 )
1512 # - the flat datasets we expect to find from those data IDs, in just
1513 # one collection (so deduplication is irrelevant):
1514 expectedFlats = [
1515 registry.findDataset(
1516 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1517 ),
1518 registry.findDataset(
1519 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1520 ),
1521 registry.findDataset(
1522 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1523 ),
1524 ]
1525 # - the data IDs we expect to extract from that:
1526 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1527 # - the bias datasets we expect to find from those data IDs, after we
1528 # subset-out the physical_filter dimension, both with duplicates:
1529 expectedAllBiases = [
1530 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1531 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1532 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1533 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1534 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1535 ]
1536 # - ...and without duplicates:
1537 expectedDeduplicatedBiases = [
1538 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1539 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1540 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1541 ]
1542 # Test against those expected results, using a "lazy" query for the
1543 # data IDs (which re-executes that query each time we use it to do
1544 # something new).
1545 dataIds = registry.queryDataIds(
1546 ["detector", "physical_filter"],
1547 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1548 instrument="Cam1",
1549 )
1550 self.assertEqual(dataIds.dimensions, expected_dimensions)
1551 self.assertEqual(dataIds.toSet(), expectedDataIds)
1552 self.assertCountEqual(
1553 list(
1554 dataIds.findDatasets(
1555 flat,
1556 collections=["imported_r"],
1557 )
1558 ),
1559 expectedFlats,
1560 )
1561 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1562 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1563 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1564 self.assertCountEqual(
1565 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1566 expectedAllBiases,
1567 )
1568 self.assertCountEqual(
1569 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1570 expectedDeduplicatedBiases,
1571 )
1573 # Searching for a dataset with dimensions we had projected away
1574 # restores those dimensions.
1575 self.assertCountEqual(
1576 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1577 expectedFlats,
1578 )
1580 # Use a named dataset type that does not exist and a dataset type
1581 # object that does not exist.
1582 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1584 # Test both string name and dataset type object.
1585 test_type: str | DatasetType
1586 for test_type, test_type_name in (
1587 (unknown_type, unknown_type.name),
1588 (unknown_type.name, unknown_type.name),
1589 ):
1590 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1591 list(
1592 subsetDataIds.findDatasets(
1593 test_type, collections=["imported_r", "imported_g"], findFirst=True
1594 )
1595 )
1597 # Materialize the bias dataset queries (only) by putting the results
1598 # into temporary tables, then repeat those tests.
1599 with subsetDataIds.findDatasets(
1600 bias, collections=["imported_r", "imported_g"], findFirst=False
1601 ).materialize() as biases:
1602 self.assertCountEqual(list(biases), expectedAllBiases)
1603 with subsetDataIds.findDatasets(
1604 bias, collections=["imported_r", "imported_g"], findFirst=True
1605 ).materialize() as biases:
1606 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1607 # Materialize the data ID subset query, but not the dataset queries.
1608 with subsetDataIds.materialize() as subsetDataIds:
1609 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1610 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1611 self.assertCountEqual(
1612 list(
1613 subsetDataIds.findDatasets(
1614 bias, collections=["imported_r", "imported_g"], findFirst=False
1615 )
1616 ),
1617 expectedAllBiases,
1618 )
1619 self.assertCountEqual(
1620 list(
1621 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1622 ),
1623 expectedDeduplicatedBiases,
1624 )
1625 # Materialize the dataset queries, too.
1626 with subsetDataIds.findDatasets(
1627 bias, collections=["imported_r", "imported_g"], findFirst=False
1628 ).materialize() as biases:
1629 self.assertCountEqual(list(biases), expectedAllBiases)
1630 with subsetDataIds.findDatasets(
1631 bias, collections=["imported_r", "imported_g"], findFirst=True
1632 ).materialize() as biases:
1633 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1634 # Materialize the original query, but none of the follow-up queries.
1635 with dataIds.materialize() as dataIds:
1636 self.assertEqual(dataIds.dimensions, expected_dimensions)
1637 self.assertEqual(dataIds.toSet(), expectedDataIds)
1638 self.assertCountEqual(
1639 list(
1640 dataIds.findDatasets(
1641 flat,
1642 collections=["imported_r"],
1643 )
1644 ),
1645 expectedFlats,
1646 )
1647 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1648 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1649 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1650 self.assertCountEqual(
1651 list(
1652 subsetDataIds.findDatasets(
1653 bias, collections=["imported_r", "imported_g"], findFirst=False
1654 )
1655 ),
1656 expectedAllBiases,
1657 )
1658 self.assertCountEqual(
1659 list(
1660 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1661 ),
1662 expectedDeduplicatedBiases,
1663 )
1664 # Materialize just the bias dataset queries.
1665 with subsetDataIds.findDatasets(
1666 bias, collections=["imported_r", "imported_g"], findFirst=False
1667 ).materialize() as biases:
1668 self.assertCountEqual(list(biases), expectedAllBiases)
1669 with subsetDataIds.findDatasets(
1670 bias, collections=["imported_r", "imported_g"], findFirst=True
1671 ).materialize() as biases:
1672 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1673 # Materialize the subset data ID query, but not the dataset
1674 # queries.
1675 with subsetDataIds.materialize() as subsetDataIds:
1676 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1677 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1678 self.assertCountEqual(
1679 list(
1680 subsetDataIds.findDatasets(
1681 bias, collections=["imported_r", "imported_g"], findFirst=False
1682 )
1683 ),
1684 expectedAllBiases,
1685 )
1686 self.assertCountEqual(
1687 list(
1688 subsetDataIds.findDatasets(
1689 bias, collections=["imported_r", "imported_g"], findFirst=True
1690 )
1691 ),
1692 expectedDeduplicatedBiases,
1693 )
1694 # Materialize the bias dataset queries, too, so now we're
1695 # materializing every single step.
1696 with subsetDataIds.findDatasets(
1697 bias, collections=["imported_r", "imported_g"], findFirst=False
1698 ).materialize() as biases:
1699 self.assertCountEqual(list(biases), expectedAllBiases)
1700 with subsetDataIds.findDatasets(
1701 bias, collections=["imported_r", "imported_g"], findFirst=True
1702 ).materialize() as biases:
1703 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1705 def testStorageClassPropagation(self):
1706 """Test that queries for datasets respect the storage class passed in
1707 as part of a full dataset type.
1708 """
1709 registry = self.makeRegistry()
1710 self.loadData(registry, "base.yaml")
1711 dataset_type_in_registry = DatasetType(
1712 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1713 )
1714 registry.registerDatasetType(dataset_type_in_registry)
1715 run = "run1"
1716 registry.registerRun(run)
1717 (inserted_ref,) = registry.insertDatasets(
1718 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1719 )
1720 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1721 query_dataset_type = DatasetType(
1722 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1723 )
1724 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1725 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1726 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1727 (query_datasets_ref,) = query_datasets_result
1728 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1729 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1730 query_dataset_type, collections=[run]
1731 )
1732 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1733 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1734 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1735 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1736 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1737 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1738 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1740 def testEmptyDimensionsQueries(self):
1741 """Test Query and QueryResults objects in the case where there are no
1742 dimensions.
1743 """
1744 # Set up test data: one dataset type, two runs, one dataset in each.
1745 registry = self.makeRegistry()
1746 self.loadData(registry, "base.yaml")
1747 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1748 registry.registerDatasetType(schema)
1749 dataId = DataCoordinate.make_empty(registry.dimensions)
1750 run1 = "run1"
1751 run2 = "run2"
1752 registry.registerRun(run1)
1753 registry.registerRun(run2)
1754 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1755 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1756 # Query directly for both of the datasets, and each one, one at a time.
1757 self.checkQueryResults(
1758 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1759 )
1760 self.checkQueryResults(
1761 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1762 [dataset1],
1763 )
1764 self.checkQueryResults(
1765 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1766 [dataset2],
1767 )
1768 # Query for data IDs with no dimensions.
1769 dataIds = registry.queryDataIds([])
1770 self.checkQueryResults(dataIds, [dataId])
1771 # Use queried data IDs to find the datasets.
1772 self.checkQueryResults(
1773 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1774 [dataset1, dataset2],
1775 )
1776 self.checkQueryResults(
1777 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1778 [dataset1],
1779 )
1780 self.checkQueryResults(
1781 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1782 [dataset2],
1783 )
1784 # Now materialize the data ID query results and repeat those tests.
1785 with dataIds.materialize() as dataIds:
1786 self.checkQueryResults(dataIds, [dataId])
1787 self.checkQueryResults(
1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1789 [dataset1],
1790 )
1791 self.checkQueryResults(
1792 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1793 [dataset2],
1794 )
1795 # Query for non-empty data IDs, then subset that to get the empty one.
1796 # Repeat the above tests starting from that.
1797 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1798 self.checkQueryResults(dataIds, [dataId])
1799 self.checkQueryResults(
1800 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1801 [dataset1, dataset2],
1802 )
1803 self.checkQueryResults(
1804 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1805 [dataset1],
1806 )
1807 self.checkQueryResults(
1808 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1809 [dataset2],
1810 )
1811 with dataIds.materialize() as dataIds:
1812 self.checkQueryResults(dataIds, [dataId])
1813 self.checkQueryResults(
1814 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1815 [dataset1, dataset2],
1816 )
1817 self.checkQueryResults(
1818 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1819 [dataset1],
1820 )
1821 self.checkQueryResults(
1822 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1823 [dataset2],
1824 )
1825 # Query for non-empty data IDs, then materialize, then subset to get
1826 # the empty one. Repeat again.
1827 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1828 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1829 self.checkQueryResults(dataIds, [dataId])
1830 self.checkQueryResults(
1831 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1832 [dataset1, dataset2],
1833 )
1834 self.checkQueryResults(
1835 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1836 [dataset1],
1837 )
1838 self.checkQueryResults(
1839 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1840 [dataset2],
1841 )
1842 with dataIds.materialize() as dataIds:
1843 self.checkQueryResults(dataIds, [dataId])
1844 self.checkQueryResults(
1845 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1846 [dataset1, dataset2],
1847 )
1848 self.checkQueryResults(
1849 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1850 [dataset1],
1851 )
1852 self.checkQueryResults(
1853 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1854 [dataset2],
1855 )
1856 # Repeat the materialization tests with a dimension element that isn't
1857 # cached, so there's no way we can know when building the query where
1858 # there are any rows are not (there aren't).
1859 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1860 with dataIds.materialize() as dataIds:
1861 self.checkQueryResults(dataIds, [])
1862 self.checkQueryResults(
1863 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1864 )
1865 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1866 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1867 # Query for non-empty data IDs with a constraint on an empty-data-ID
1868 # dataset that exists.
1869 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1870 self.checkQueryResults(
1871 dataIds.subset(unique=True),
1872 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1873 )
1874 # Again query for non-empty data IDs with a constraint on empty-data-ID
1875 # datasets, but when the datasets don't exist. We delete the existing
1876 # dataset and query just that collection rather than creating a new
1877 # empty collection because this is a bit less likely for our build-time
1878 # logic to shortcut-out (via the collection summaries), and such a
1879 # shortcut would make this test a bit more trivial than we'd like.
1880 registry.removeDatasets([dataset2])
1881 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1882 self.checkQueryResults(dataIds, [])
1884 def testDimensionDataModifications(self):
1885 """Test that modifying dimension records via:
1886 syncDimensionData(..., update=True) and
1887 insertDimensionData(..., replace=True) works as expected, even in the
1888 presence of datasets using those dimensions and spatial overlap
1889 relationships.
1890 """
1892 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1893 """Unpack a sphgeom.RangeSet into the integers it contains."""
1894 for begin, end in ranges:
1895 yield from range(begin, end)
1897 def _range_set_hull(
1898 ranges: lsst.sphgeom.RangeSet,
1899 pixelization: lsst.sphgeom.HtmPixelization,
1900 ) -> lsst.sphgeom.ConvexPolygon:
1901 """Create a ConvexPolygon hull of the region defined by a set of
1902 HTM pixelization index ranges.
1903 """
1904 points = []
1905 for index in _unpack_range_set(ranges):
1906 points.extend(pixelization.triangle(index).getVertices())
1907 return lsst.sphgeom.ConvexPolygon(points)
1909 # Use HTM to set up an initial parent region (one arbitrary trixel)
1910 # and four child regions (the trixels within the parent at the next
1911 # level. We'll use the parent as a tract/visit region and the children
1912 # as its patch/visit_detector regions.
1913 registry = self.makeRegistry()
1914 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1915 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1916 index = 12288
1917 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1918 assert htm6.universe().contains(child_ranges_small)
1919 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1920 parent_region_small = lsst.sphgeom.ConvexPolygon(
1921 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1922 )
1923 assert all(parent_region_small.contains(c) for c in child_regions_small)
1924 # Make a larger version of each child region, defined to be the set of
1925 # htm6 trixels that overlap the original's bounding circle. Make a new
1926 # parent that's the convex hull of the new children.
1927 child_regions_large = [
1928 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1929 ]
1930 assert all(
1931 large.contains(small)
1932 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1933 )
1934 parent_region_large = lsst.sphgeom.ConvexPolygon(
1935 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1936 )
1937 assert all(parent_region_large.contains(c) for c in child_regions_large)
1938 assert parent_region_large.contains(parent_region_small)
1939 assert not parent_region_small.contains(parent_region_large)
1940 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1941 # Find some commonSkyPix indices that overlap the large regions but not
1942 # overlap the small regions. We use commonSkyPix here to make sure the
1943 # real tests later involve what's in the database, not just post-query
1944 # filtering of regions.
1945 child_difference_indices = []
1946 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1947 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1948 assert difference, "if this is empty, we can't test anything useful with these regions"
1949 assert all(
1950 not commonSkyPix.triangle(d).isDisjointFrom(large)
1951 and commonSkyPix.triangle(d).isDisjointFrom(small)
1952 for d in difference
1953 )
1954 child_difference_indices.append(difference)
1955 parent_difference_indices = list(
1956 _unpack_range_set(
1957 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1958 )
1959 )
1960 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1961 assert all(
1962 (
1963 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1964 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1965 )
1966 for d in parent_difference_indices
1967 )
1968 # Now that we've finally got those regions, we'll insert the large ones
1969 # as tract/patch dimension records.
1970 skymap_name = "testing_v1"
1971 registry.insertDimensionData(
1972 "skymap",
1973 {
1974 "name": skymap_name,
1975 "hash": bytes([42]),
1976 "tract_max": 1,
1977 "patch_nx_max": 2,
1978 "patch_ny_max": 2,
1979 },
1980 )
1981 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1982 registry.insertDimensionData(
1983 "patch",
1984 *[
1985 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1986 for n, c in enumerate(child_regions_large)
1987 ],
1988 )
1989 # Add at dataset that uses these dimensions to make sure that modifying
1990 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1991 # implement insert with replace=True as delete-then-insert).
1992 dataset_type = DatasetType(
1993 "coadd",
1994 dimensions=["tract", "patch"],
1995 universe=registry.dimensions,
1996 storageClass="Exposure",
1997 )
1998 registry.registerDatasetType(dataset_type)
1999 registry.registerCollection("the_run", CollectionType.RUN)
2000 registry.insertDatasets(
2001 dataset_type,
2002 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2003 run="the_run",
2004 )
2005 # Query for tracts and patches that overlap some "difference" htm9
2006 # pixels; there should be overlaps, because the database has
2007 # the "large" suite of regions.
2008 self.assertEqual(
2009 {0},
2010 {
2011 data_id["tract"]
2012 for data_id in registry.queryDataIds(
2013 ["tract"],
2014 skymap=skymap_name,
2015 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2016 )
2017 },
2018 )
2019 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2020 self.assertIn(
2021 patch_id,
2022 {
2023 data_id["patch"]
2024 for data_id in registry.queryDataIds(
2025 ["patch"],
2026 skymap=skymap_name,
2027 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2028 )
2029 },
2030 )
2031 # Use sync to update the tract region and insert to update the regions
2032 # of the patches, to the "small" suite.
2033 updated = registry.syncDimensionData(
2034 "tract",
2035 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2036 update=True,
2037 )
2038 self.assertEqual(updated, {"region": parent_region_large})
2039 registry.insertDimensionData(
2040 "patch",
2041 *[
2042 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2043 for n, c in enumerate(child_regions_small)
2044 ],
2045 replace=True,
2046 )
2047 # Query again; there now should be no such overlaps, because the
2048 # database has the "small" suite of regions.
2049 self.assertFalse(
2050 set(
2051 registry.queryDataIds(
2052 ["tract"],
2053 skymap=skymap_name,
2054 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2055 )
2056 )
2057 )
2058 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2059 self.assertNotIn(
2060 patch_id,
2061 {
2062 data_id["patch"]
2063 for data_id in registry.queryDataIds(
2064 ["patch"],
2065 skymap=skymap_name,
2066 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2067 )
2068 },
2069 )
2070 # Update back to the large regions and query one more time.
2071 updated = registry.syncDimensionData(
2072 "tract",
2073 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2074 update=True,
2075 )
2076 self.assertEqual(updated, {"region": parent_region_small})
2077 registry.insertDimensionData(
2078 "patch",
2079 *[
2080 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2081 for n, c in enumerate(child_regions_large)
2082 ],
2083 replace=True,
2084 )
2085 self.assertEqual(
2086 {0},
2087 {
2088 data_id["tract"]
2089 for data_id in registry.queryDataIds(
2090 ["tract"],
2091 skymap=skymap_name,
2092 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2093 )
2094 },
2095 )
2096 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2097 self.assertIn(
2098 patch_id,
2099 {
2100 data_id["patch"]
2101 for data_id in registry.queryDataIds(
2102 ["patch"],
2103 skymap=skymap_name,
2104 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2105 )
2106 },
2107 )
2109 def testCalibrationCollections(self):
2110 """Test operations on `~CollectionType.CALIBRATION` collections,
2111 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2112 `SqlRegistry.findDataset`, and
2113 `DataCoordinateQueryResults.findRelatedDatasets`.
2114 """
2115 # Setup - make a Registry, fill it with some datasets in
2116 # non-calibration collections.
2117 registry = self.makeRegistry()
2118 self.loadData(registry, "base.yaml")
2119 self.loadData(registry, "datasets.yaml")
2120 # Set up some timestamps.
2121 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2122 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2123 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2124 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2125 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2126 allTimespans = [
2127 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2128 ]
2129 # Insert some exposure records with timespans between each sequential
2130 # pair of those.
2131 registry.insertDimensionData(
2132 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2133 )
2134 registry.insertDimensionData(
2135 "group",
2136 {"instrument": "Cam1", "name": "group0"},
2137 {"instrument": "Cam1", "name": "group1"},
2138 {"instrument": "Cam1", "name": "group2"},
2139 {"instrument": "Cam1", "name": "group3"},
2140 )
2141 registry.insertDimensionData(
2142 "exposure",
2143 {
2144 "instrument": "Cam1",
2145 "id": 0,
2146 "group": "group0",
2147 "obs_id": "zero",
2148 "physical_filter": "Cam1-G",
2149 "day_obs": 20200101,
2150 "timespan": Timespan(t1, t2),
2151 },
2152 {
2153 "instrument": "Cam1",
2154 "id": 1,
2155 "group": "group1",
2156 "obs_id": "one",
2157 "physical_filter": "Cam1-G",
2158 "day_obs": 20200101,
2159 "timespan": Timespan(t2, t3),
2160 },
2161 {
2162 "instrument": "Cam1",
2163 "id": 2,
2164 "group": "group2",
2165 "obs_id": "two",
2166 "physical_filter": "Cam1-G",
2167 "day_obs": 20200101,
2168 "timespan": Timespan(t3, t4),
2169 },
2170 {
2171 "instrument": "Cam1",
2172 "id": 3,
2173 "group": "group3",
2174 "obs_id": "three",
2175 "physical_filter": "Cam1-G",
2176 "day_obs": 20200101,
2177 "timespan": Timespan(t4, t5),
2178 },
2179 )
2180 # Get references to some datasets.
2181 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2182 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2183 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2184 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2185 # Register the main calibration collection we'll be working with.
2186 collection = "Cam1/calibs/default"
2187 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2188 # Cannot associate into a calibration collection (no timespan).
2189 with self.assertRaises(CollectionTypeError):
2190 registry.associate(collection, [bias2a])
2191 # Certify 2a dataset with [t2, t4) validity.
2192 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2193 # Test that we can query for this dataset via the new collection, both
2194 # on its own and with a RUN collection.
2195 self.assertEqual(
2196 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2197 {bias2a},
2198 )
2199 self.assertEqual(
2200 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2201 {
2202 bias2a,
2203 bias2b,
2204 bias3b,
2205 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2206 },
2207 )
2208 self.assertEqual(
2209 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2210 {registry.expandDataId(instrument="Cam1", detector=2)},
2211 )
2212 self.assertEqual(
2213 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2214 {
2215 registry.expandDataId(instrument="Cam1", detector=2),
2216 registry.expandDataId(instrument="Cam1", detector=3),
2217 registry.expandDataId(instrument="Cam1", detector=4),
2218 },
2219 )
2220 self.assertEqual(
2221 set(
2222 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2223 "bias", findFirst=True, collections=[collection]
2224 )
2225 ),
2226 {
2227 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2228 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2229 },
2230 )
2231 self.assertEqual(
2232 set(
2233 registry.queryDataIds(
2234 ["exposure", "detector"], instrument="Cam1", detector=2
2235 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2236 ),
2237 {
2238 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2239 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2240 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2241 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2242 },
2243 )
2245 # We should not be able to certify 2b with anything overlapping that
2246 # window.
2247 with self.assertRaises(ConflictingDefinitionError):
2248 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2249 with self.assertRaises(ConflictingDefinitionError):
2250 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2251 with self.assertRaises(ConflictingDefinitionError):
2252 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2253 with self.assertRaises(ConflictingDefinitionError):
2254 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2255 with self.assertRaises(ConflictingDefinitionError):
2256 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2257 with self.assertRaises(ConflictingDefinitionError):
2258 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2259 with self.assertRaises(ConflictingDefinitionError):
2260 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2261 with self.assertRaises(ConflictingDefinitionError):
2262 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2263 # We should be able to certify 3a with a range overlapping that window,
2264 # because it's for a different detector.
2265 # We'll certify 3a over [t1, t3).
2266 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2267 # Now we'll certify 2b and 3b together over [t4, ∞).
2268 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2270 # Fetch all associations and check that they are what we expect.
2271 self.assertCountEqual(
2272 list(
2273 registry.queryDatasetAssociations(
2274 "bias",
2275 collections=[collection, "imported_g", "imported_r"],
2276 )
2277 ),
2278 [
2279 DatasetAssociation(
2280 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2281 collection="imported_g",
2282 timespan=None,
2283 ),
2284 DatasetAssociation(
2285 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2286 collection="imported_r",
2287 timespan=None,
2288 ),
2289 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2290 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2291 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2292 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2293 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2294 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2295 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2296 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2297 ],
2298 )
2300 class Ambiguous:
2301 """Tag class to denote lookups that should be ambiguous."""
2303 pass
2305 def _assertLookup(
2306 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2307 ) -> None:
2308 """Local function that asserts that a bias lookup returns the given
2309 expected result.
2310 """
2311 if expected is Ambiguous:
2312 with self.assertRaises((DatasetTypeError, LookupError)):
2313 registry.findDataset(
2314 "bias",
2315 collections=collection,
2316 instrument="Cam1",
2317 detector=detector,
2318 timespan=timespan,
2319 )
2320 else:
2321 self.assertEqual(
2322 expected,
2323 registry.findDataset(
2324 "bias",
2325 collections=collection,
2326 instrument="Cam1",
2327 detector=detector,
2328 timespan=timespan,
2329 ),
2330 )
2332 # Systematically test lookups against expected results.
2333 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2334 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2335 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2336 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2337 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2338 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2339 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2340 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2341 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2342 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2343 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2344 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2345 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2346 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2347 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2348 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2349 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2350 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2351 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2352 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2353 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2354 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2355 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2356 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2357 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2358 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2359 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2360 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2361 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2362 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2363 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2364 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2365 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2366 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2367 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2368 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2369 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2370 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2371 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2372 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2373 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2374 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2376 # Test lookups via temporal joins to exposures.
2377 self.assertEqual(
2378 set(
2379 registry.queryDataIds(
2380 ["exposure", "detector"], instrument="Cam1", detector=2
2381 ).findRelatedDatasets("bias", collections=[collection])
2382 ),
2383 {
2384 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2385 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2386 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2387 },
2388 )
2389 self.assertEqual(
2390 set(
2391 registry.queryDataIds(
2392 ["exposure", "detector"], instrument="Cam1", detector=3
2393 ).findRelatedDatasets("bias", collections=[collection])
2394 ),
2395 {
2396 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2397 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2398 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2399 },
2400 )
2401 self.assertEqual(
2402 set(
2403 registry.queryDataIds(
2404 ["exposure", "detector"], instrument="Cam1", detector=2
2405 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2406 ),
2407 {
2408 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2409 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2410 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2411 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2412 },
2413 )
2414 self.assertEqual(
2415 set(
2416 registry.queryDataIds(
2417 ["exposure", "detector"], instrument="Cam1", detector=3
2418 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2419 ),
2420 {
2421 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2422 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2423 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2424 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2425 },
2426 )
2428 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2429 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2430 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2431 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2432 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2433 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2434 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2435 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2436 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2437 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2438 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2439 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2440 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2441 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2442 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2443 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2444 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2445 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2446 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2447 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2448 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2449 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2450 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2451 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2452 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2453 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2454 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2455 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2456 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2457 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2458 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2459 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2460 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2461 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2462 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2463 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2464 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2465 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2466 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2467 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2468 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2469 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2470 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2471 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2472 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2473 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2475 # Decertify everything, this time with explicit data IDs, then check
2476 # that no lookups succeed.
2477 registry.decertify(
2478 collection,
2479 "bias",
2480 Timespan(None, None),
2481 dataIds=[
2482 dict(instrument="Cam1", detector=2),
2483 dict(instrument="Cam1", detector=3),
2484 ],
2485 )
2486 for detector in (2, 3):
2487 for timespan in allTimespans:
2488 _assertLookup(detector=detector, timespan=timespan, expected=None)
2489 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2490 # those.
2491 registry.certify(
2492 collection,
2493 [bias2a, bias3a],
2494 Timespan(None, None),
2495 )
2496 for timespan in allTimespans:
2497 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2498 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2499 # Decertify just bias2 over [t2, t4).
2500 # This should split a single certification row into two (and leave the
2501 # other existing row, for bias3a, alone).
2502 registry.decertify(
2503 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2504 )
2505 for timespan in allTimespans:
2506 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2507 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2508 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2509 if overlapsBefore and overlapsAfter:
2510 expected = Ambiguous
2511 elif overlapsBefore or overlapsAfter:
2512 expected = bias2a
2513 else:
2514 expected = None
2515 _assertLookup(detector=2, timespan=timespan, expected=expected)
2517 def testSkipCalibs(self):
2518 """Test how queries handle skipping of calibration collections."""
2519 registry = self.makeRegistry()
2520 self.loadData(registry, "base.yaml")
2521 self.loadData(registry, "datasets.yaml")
2523 coll_calib = "Cam1/calibs/default"
2524 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2526 # Add all biases to the calibration collection.
2527 # Without this, the logic that prunes dataset subqueries based on
2528 # datasetType-collection summary information will fire before the logic
2529 # we want to test below. This is a good thing (it avoids the dreaded
2530 # NotImplementedError a bit more often) everywhere but here.
2531 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2533 coll_list = [coll_calib, "imported_g", "imported_r"]
2534 chain = "Cam1/chain"
2535 registry.registerCollection(chain, type=CollectionType.CHAINED)
2536 registry.setCollectionChain(chain, coll_list)
2538 # explicit list will raise if findFirst=True or there are temporal
2539 # dimensions
2540 with self.assertRaises(NotImplementedError):
2541 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2542 with self.assertRaises(NotImplementedError):
2543 registry.queryDataIds(
2544 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2545 ).count()
2547 # chain will skip
2548 datasets = list(registry.queryDatasets("bias", collections=chain))
2549 self.assertGreater(len(datasets), 0)
2551 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2552 self.assertGreater(len(dataIds), 0)
2554 # glob will skip too
2555 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2556 self.assertGreater(len(datasets), 0)
2558 # regular expression will skip too
2559 pattern = re.compile(".*")
2560 datasets = list(registry.queryDatasets("bias", collections=pattern))
2561 self.assertGreater(len(datasets), 0)
2563 # ellipsis should work as usual
2564 datasets = list(registry.queryDatasets("bias", collections=...))
2565 self.assertGreater(len(datasets), 0)
2567 # few tests with findFirst
2568 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2569 self.assertGreater(len(datasets), 0)
2571 def testIngestTimeQuery(self):
2572 registry = self.makeRegistry()
2573 self.loadData(registry, "base.yaml")
2574 dt0 = datetime.datetime.now(datetime.UTC)
2575 self.loadData(registry, "datasets.yaml")
2576 dt1 = datetime.datetime.now(datetime.UTC)
2578 datasets = list(registry.queryDatasets(..., collections=...))
2579 len0 = len(datasets)
2580 self.assertGreater(len0, 0)
2582 where = "ingest_date > T'2000-01-01'"
2583 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2584 len1 = len(datasets)
2585 self.assertEqual(len0, len1)
2587 # no one will ever use this piece of software in 30 years
2588 where = "ingest_date > T'2050-01-01'"
2589 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2590 len2 = len(datasets)
2591 self.assertEqual(len2, 0)
2593 # Check more exact timing to make sure there is no 37 seconds offset
2594 # (after fixing DM-30124). SQLite time precision is 1 second, make
2595 # sure that we don't test with higher precision.
2596 tests = [
2597 # format: (timestamp, operator, expected_len)
2598 (dt0 - timedelta(seconds=1), ">", len0),
2599 (dt0 - timedelta(seconds=1), "<", 0),
2600 (dt1 + timedelta(seconds=1), "<", len0),
2601 (dt1 + timedelta(seconds=1), ">", 0),
2602 ]
2603 for dt, op, expect_len in tests:
2604 dt_str = dt.isoformat(sep=" ")
2606 where = f"ingest_date {op} T'{dt_str}'"
2607 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2608 self.assertEqual(len(datasets), expect_len)
2610 # same with bind using datetime or astropy Time
2611 where = f"ingest_date {op} ingest_time"
2612 datasets = list(
2613 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2614 )
2615 self.assertEqual(len(datasets), expect_len)
2617 dt_astropy = astropy.time.Time(dt, format="datetime")
2618 datasets = list(
2619 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2620 )
2621 self.assertEqual(len(datasets), expect_len)
2623 def testTimespanQueries(self):
2624 """Test query expressions involving timespans."""
2625 registry = self.makeRegistry()
2626 self.loadData(registry, "hsc-rc2-subset.yaml")
2627 # All exposures in the database; mapping from ID to timespan.
2628 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2629 # Just those IDs, sorted (which is also temporal sorting, because HSC
2630 # exposure IDs are monotonically increasing).
2631 ids = sorted(visits.keys())
2632 self.assertGreater(len(ids), 20)
2633 # Pick some quasi-random indexes into `ids` to play with.
2634 i1 = int(len(ids) * 0.1)
2635 i2 = int(len(ids) * 0.3)
2636 i3 = int(len(ids) * 0.6)
2637 i4 = int(len(ids) * 0.8)
2638 # Extract some times from those: just before the beginning of i1 (which
2639 # should be after the end of the exposure before), exactly the
2640 # beginning of i2, just after the beginning of i3 (and before its end),
2641 # and the exact end of i4.
2642 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2643 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2644 t2 = visits[ids[i2]].begin
2645 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2646 self.assertLess(t3, visits[ids[i3]].end)
2647 t4 = visits[ids[i4]].end
2648 # Make sure those are actually in order.
2649 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2651 bind = {
2652 "t1": t1,
2653 "t2": t2,
2654 "t3": t3,
2655 "t4": t4,
2656 "ts23": Timespan(t2, t3),
2657 }
2659 def query(where):
2660 """Return results as a sorted, deduplicated list of visit IDs.
2662 Parameters
2663 ----------
2664 where : `str`
2665 The WHERE clause for the query.
2666 """
2667 return sorted(
2668 {
2669 dataId["visit"]
2670 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2671 }
2672 )
2674 # Try a bunch of timespan queries, mixing up the bounds themselves,
2675 # where they appear in the expression, and how we get the timespan into
2676 # the expression.
2678 # t1 is before the start of i1, so this should not include i1.
2679 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2680 # t2 is exactly at the start of i2, but ends are exclusive, so these
2681 # should not include i2.
2682 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2683 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2684 # t3 is in the middle of i3, so this should include i3.
2685 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2686 # This one should not include t3 by the same reasoning.
2687 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2688 # t4 is exactly at the end of i4, so this should include i4.
2689 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2690 # i4's upper bound of t4 is exclusive so this should not include t4.
2691 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2693 # Now some timespan vs. time scalar queries.
2694 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2695 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2696 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2697 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2698 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2699 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2701 # Empty timespans should not overlap anything.
2702 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2704 def testCollectionSummaries(self):
2705 """Test recording and retrieval of collection summaries."""
2706 self.maxDiff = None
2707 registry = self.makeRegistry()
2708 # Importing datasets from yaml should go through the code path where
2709 # we update collection summaries as we insert datasets.
2710 self.loadData(registry, "base.yaml")
2711 self.loadData(registry, "datasets.yaml")
2712 flat = registry.getDatasetType("flat")
2713 expected1 = CollectionSummary()
2714 expected1.dataset_types.add(registry.getDatasetType("bias"))
2715 expected1.add_data_ids(
2716 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2717 )
2718 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2719 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2720 # Create a chained collection with both of the imported runs; the
2721 # summary should be the same, because it's a union with itself.
2722 chain = "chain"
2723 registry.registerCollection(chain, CollectionType.CHAINED)
2724 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2725 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2726 # Associate flats only into a tagged collection and a calibration
2727 # collection to check summaries of those.
2728 tag = "tag"
2729 registry.registerCollection(tag, CollectionType.TAGGED)
2730 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2731 calibs = "calibs"
2732 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2733 registry.certify(
2734 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2735 )
2736 expected2 = expected1.copy()
2737 expected2.dataset_types.discard("bias")
2738 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2739 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2740 # Explicitly calling SqlRegistry.refresh() should load those same
2741 # summaries, via a totally different code path.
2742 registry.refresh()
2743 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2744 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2745 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2746 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2748 def testBindInQueryDatasets(self):
2749 """Test that the bind parameter is correctly forwarded in
2750 queryDatasets recursion.
2751 """
2752 registry = self.makeRegistry()
2753 # Importing datasets from yaml should go through the code path where
2754 # we update collection summaries as we insert datasets.
2755 self.loadData(registry, "base.yaml")
2756 self.loadData(registry, "datasets.yaml")
2757 self.assertEqual(
2758 set(registry.queryDatasets("flat", band="r", collections=...)),
2759 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2760 )
2762 def testQueryIntRangeExpressions(self):
2763 """Test integer range expressions in ``where`` arguments.
2765 Note that our expressions use inclusive stop values, unlike Python's.
2766 """
2767 registry = self.makeRegistry()
2768 self.loadData(registry, "base.yaml")
2769 self.assertEqual(
2770 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2771 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2772 )
2773 self.assertEqual(
2774 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2775 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2776 )
2777 self.assertEqual(
2778 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2779 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2780 )
2782 def testQueryResultSummaries(self):
2783 """Test summary methods like `count`, `any`, and `explain_no_results`
2784 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2785 """
2786 registry = self.makeRegistry()
2787 self.loadData(registry, "base.yaml")
2788 self.loadData(registry, "datasets.yaml")
2789 self.loadData(registry, "spatial.yaml")
2790 # Default test dataset has two collections, each with both flats and
2791 # biases. Add a new collection with only biases.
2792 registry.registerCollection("biases", CollectionType.TAGGED)
2793 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2794 # First query yields two results, and involves no postprocessing.
2795 query1 = registry.queryDataIds(["physical_filter"], band="r")
2796 self.assertTrue(query1.any(execute=False, exact=False))
2797 self.assertTrue(query1.any(execute=True, exact=False))
2798 self.assertTrue(query1.any(execute=True, exact=True))
2799 self.assertEqual(query1.count(exact=False), 2)
2800 self.assertEqual(query1.count(exact=True), 2)
2801 self.assertFalse(list(query1.explain_no_results()))
2802 # Second query should yield no results, which we should see when
2803 # we attempt to expand the data ID.
2804 query2 = registry.queryDataIds(["physical_filter"], band="h")
2805 # There's no execute=False, exact=Fals test here because the behavior
2806 # not something we want to guarantee in this case (and exact=False
2807 # says either answer is legal).
2808 self.assertFalse(query2.any(execute=True, exact=False))
2809 self.assertFalse(query2.any(execute=True, exact=True))
2810 self.assertEqual(query2.count(exact=False), 0)
2811 self.assertEqual(query2.count(exact=True), 0)
2812 self.assertTrue(list(query2.explain_no_results()))
2813 # These queries yield no results due to various problems that can be
2814 # spotted prior to execution, yielding helpful diagnostics.
2815 base_query = registry.queryDataIds(["detector", "physical_filter"])
2816 queries_and_snippets = [
2817 (
2818 # Dataset type name doesn't match any existing dataset types.
2819 registry.queryDatasets("nonexistent", collections=...),
2820 ["nonexistent"],
2821 ),
2822 (
2823 # Dataset type object isn't registered.
2824 registry.queryDatasets(
2825 DatasetType(
2826 "nonexistent",
2827 dimensions=["instrument"],
2828 universe=registry.dimensions,
2829 storageClass="Image",
2830 ),
2831 collections=...,
2832 ),
2833 ["nonexistent"],
2834 ),
2835 (
2836 # No datasets of this type in this collection.
2837 registry.queryDatasets("flat", collections=["biases"]),
2838 ["flat", "biases"],
2839 ),
2840 (
2841 # No datasets of this type in this collection.
2842 base_query.findDatasets("flat", collections=["biases"]),
2843 ["flat", "biases"],
2844 ),
2845 (
2846 # No collections matching at all.
2847 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2848 ["potato"],
2849 ),
2850 ]
2851 with self.assertRaises(MissingDatasetTypeError):
2852 # Dataset type name doesn't match any existing dataset types.
2853 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2854 with self.assertRaises(MissingDatasetTypeError):
2855 # Dataset type name doesn't match any existing dataset types.
2856 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2857 for query, snippets in queries_and_snippets:
2858 self.assertFalse(query.any(execute=False, exact=False))
2859 self.assertFalse(query.any(execute=True, exact=False))
2860 self.assertFalse(query.any(execute=True, exact=True))
2861 self.assertEqual(query.count(exact=False), 0)
2862 self.assertEqual(query.count(exact=True), 0)
2863 messages = list(query.explain_no_results())
2864 self.assertTrue(messages)
2865 # Want all expected snippets to appear in at least one message.
2866 self.assertTrue(
2867 any(
2868 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2869 ),
2870 messages,
2871 )
2873 # Wildcards on dataset types are not permitted in queryDataIds.
2874 with self.assertRaises(DatasetTypeExpressionError):
2875 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2877 # These queries yield no results due to problems that can be identified
2878 # by cheap follow-up queries, yielding helpful diagnostics.
2879 for query, snippets in [
2880 (
2881 # No records for one of the involved dimensions.
2882 registry.queryDataIds(["subfilter"]),
2883 ["no rows", "subfilter"],
2884 ),
2885 (
2886 # No records for one of the involved dimensions.
2887 registry.queryDimensionRecords("subfilter"),
2888 ["no rows", "subfilter"],
2889 ),
2890 ]:
2891 self.assertFalse(query.any(execute=True, exact=False))
2892 self.assertFalse(query.any(execute=True, exact=True))
2893 self.assertEqual(query.count(exact=True), 0)
2894 messages = list(query.explain_no_results())
2895 self.assertTrue(messages)
2896 # Want all expected snippets to appear in at least one message.
2897 self.assertTrue(
2898 any(
2899 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2900 ),
2901 messages,
2902 )
2904 # This query yields four overlaps in the database, but one is filtered
2905 # out in postprocessing. The count queries aren't accurate because
2906 # they don't account for duplication that happens due to an internal
2907 # join against commonSkyPix.
2908 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2909 self.assertEqual(
2910 {
2911 DataCoordinate.standardize(
2912 instrument="Cam1",
2913 skymap="SkyMap1",
2914 visit=v,
2915 tract=t,
2916 universe=registry.dimensions,
2917 )
2918 for v, t in [(1, 0), (2, 0), (2, 1)]
2919 },
2920 set(query3),
2921 )
2922 self.assertTrue(query3.any(execute=False, exact=False))
2923 self.assertTrue(query3.any(execute=True, exact=False))
2924 self.assertTrue(query3.any(execute=True, exact=True))
2925 self.assertGreaterEqual(query3.count(exact=False), 4)
2926 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2927 self.assertFalse(list(query3.explain_no_results()))
2928 # This query yields overlaps in the database, but all are filtered
2929 # out in postprocessing. The count queries again aren't very useful.
2930 # We have to use `where=` here to avoid an optimization that
2931 # (currently) skips the spatial postprocess-filtering because it
2932 # recognizes that no spatial join is necessary. That's not ideal, but
2933 # fixing it is out of scope for this ticket.
2934 query4 = registry.queryDataIds(
2935 ["visit", "tract"],
2936 instrument="Cam1",
2937 skymap="SkyMap1",
2938 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2939 )
2940 self.assertFalse(set(query4))
2941 self.assertTrue(query4.any(execute=False, exact=False))
2942 self.assertTrue(query4.any(execute=True, exact=False))
2943 self.assertFalse(query4.any(execute=True, exact=True))
2944 self.assertGreaterEqual(query4.count(exact=False), 1)
2945 self.assertEqual(query4.count(exact=True, discard=True), 0)
2946 messages = query4.explain_no_results()
2947 self.assertTrue(messages)
2948 self.assertTrue(any("overlap" in message for message in messages))
2949 # This query should yield results from one dataset type but not the
2950 # other, which is not registered.
2951 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2952 self.assertTrue(set(query5))
2953 self.assertTrue(query5.any(execute=False, exact=False))
2954 self.assertTrue(query5.any(execute=True, exact=False))
2955 self.assertTrue(query5.any(execute=True, exact=True))
2956 self.assertGreaterEqual(query5.count(exact=False), 1)
2957 self.assertGreaterEqual(query5.count(exact=True), 1)
2958 self.assertFalse(list(query5.explain_no_results()))
2959 # This query applies a selection that yields no results, fully in the
2960 # database. Explaining why it fails involves traversing the relation
2961 # tree and running a LIMIT 1 query at each level that has the potential
2962 # to remove rows.
2963 query6 = registry.queryDimensionRecords(
2964 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2965 )
2966 self.assertEqual(query6.count(exact=True), 0)
2967 messages = query6.explain_no_results()
2968 self.assertTrue(messages)
2969 self.assertTrue(any("no-purpose" in message for message in messages))
2971 def testQueryDataIdsExpressionError(self):
2972 """Test error checking of 'where' expressions in queryDataIds."""
2973 registry = self.makeRegistry()
2974 self.loadData(registry, "base.yaml")
2975 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2976 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2977 registry.queryDataIds(["detector"], where="foo.bar = 12")
2978 with self.assertRaisesRegex(
2979 LookupError, "Dimension element name cannot be inferred in this context."
2980 ):
2981 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2983 def testQueryDataIdsOrderBy(self):
2984 """Test order_by and limit on result returned by queryDataIds()."""
2985 registry = self.makeRegistry()
2986 self.loadData(registry, "base.yaml")
2987 self.loadData(registry, "datasets.yaml")
2988 self.loadData(registry, "spatial.yaml")
2990 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2991 return registry.queryDataIds(
2992 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2993 )
2995 Test = namedtuple(
2996 "testQueryDataIdsOrderByTest",
2997 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2998 defaults=(None, None, None),
2999 )
3001 test_data = (
3002 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3003 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
3004 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
3005 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
3006 Test(
3007 "tract.id,visit.id",
3008 "tract,visit",
3009 ((0, 1), (0, 1), (0, 2)),
3010 limit=(3,),
3011 ),
3012 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
3013 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
3014 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
3015 Test(
3016 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
3017 ),
3018 Test(
3019 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
3020 ),
3021 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3022 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3023 Test(
3024 "tract,-visit.timespan.begin,visit.timespan.end",
3025 "tract,visit",
3026 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
3027 ),
3028 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3029 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3030 Test(
3031 "tract,detector",
3032 "tract,detector",
3033 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3034 datasets="flat",
3035 collections="imported_r",
3036 ),
3037 Test(
3038 "tract,detector.full_name",
3039 "tract,detector",
3040 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3041 datasets="flat",
3042 collections="imported_r",
3043 ),
3044 Test(
3045 "tract,detector.raft,detector.name_in_raft",
3046 "tract,detector",
3047 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3048 datasets="flat",
3049 collections="imported_r",
3050 ),
3051 )
3053 for test in test_data:
3054 order_by = test.order_by.split(",")
3055 keys = test.keys.split(",")
3056 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3057 if test.limit is not None:
3058 query = query.limit(*test.limit)
3059 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3060 self.assertEqual(dataIds, test.result)
3062 # and materialize
3063 query = do_query(keys).order_by(*order_by)
3064 if test.limit is not None:
3065 query = query.limit(*test.limit)
3066 with self.assertRaises(RelationalAlgebraError):
3067 with query.materialize():
3068 pass
3070 # errors in a name
3071 for order_by in ("", "-"):
3072 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3073 list(do_query().order_by(order_by))
3075 for order_by in ("undimension.name", "-undimension.name"):
3076 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
3077 list(do_query().order_by(order_by))
3079 for order_by in ("attract", "-attract"):
3080 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3081 list(do_query().order_by(order_by))
3083 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3084 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3086 with self.assertRaisesRegex(
3087 ValueError,
3088 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3089 r"qualify timespan with specific dimension name\.",
3090 ):
3091 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3093 with self.assertRaisesRegex(
3094 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3095 ):
3096 list(do_query("tract").order_by("timespan.begin"))
3098 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3099 list(do_query("tract").order_by("tract.timespan.begin"))
3101 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3102 list(do_query("tract").order_by("tract.name"))
3104 with self.assertRaisesRegex(
3105 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3106 ):
3107 list(do_query("visit").order_by("timestamp.begin"))
3109 def testQueryDataIdsGovernorExceptions(self):
3110 """Test exceptions raised by queryDataIds() for incorrect governors."""
3111 registry = self.makeRegistry()
3112 self.loadData(registry, "base.yaml")
3113 self.loadData(registry, "datasets.yaml")
3114 self.loadData(registry, "spatial.yaml")
3116 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3117 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3119 Test = namedtuple(
3120 "testQueryDataIdExceptionsTest",
3121 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3122 defaults=(None, None, None, {}, None, 0),
3123 )
3125 test_data = (
3126 Test("tract,visit", count=6),
3127 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3128 Test(
3129 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3130 ),
3131 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3132 Test(
3133 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3134 ),
3135 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3136 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3137 Test(
3138 "tract,visit",
3139 where="instrument=cam AND skymap=map",
3140 bind={"cam": "Cam1", "map": "SkyMap1"},
3141 count=6,
3142 ),
3143 Test(
3144 "tract,visit",
3145 where="instrument=cam AND skymap=map",
3146 bind={"cam": "Cam", "map": "SkyMap"},
3147 exception=DataIdValueError,
3148 ),
3149 )
3151 for test in test_data:
3152 dimensions = test.dimensions.split(",")
3153 if test.exception:
3154 with self.assertRaises(test.exception):
3155 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3156 else:
3157 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3158 self.assertEqual(query.count(discard=True), test.count)
3160 # and materialize
3161 if test.exception:
3162 with self.assertRaises(test.exception):
3163 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3164 with query.materialize() as materialized:
3165 materialized.count(discard=True)
3166 else:
3167 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3168 with query.materialize() as materialized:
3169 self.assertEqual(materialized.count(discard=True), test.count)
3171 def testQueryDimensionRecordsOrderBy(self):
3172 """Test order_by and limit on result returned by
3173 queryDimensionRecords().
3174 """
3175 registry = self.makeRegistry()
3176 self.loadData(registry, "base.yaml")
3177 self.loadData(registry, "datasets.yaml")
3178 self.loadData(registry, "spatial.yaml")
3180 def do_query(element, datasets=None, collections=None):
3181 return registry.queryDimensionRecords(
3182 element, instrument="Cam1", datasets=datasets, collections=collections
3183 )
3185 query = do_query("detector")
3186 self.assertEqual(len(list(query)), 4)
3188 Test = namedtuple(
3189 "testQueryDataIdsOrderByTest",
3190 ("element", "order_by", "result", "limit", "datasets", "collections"),
3191 defaults=(None, None, None),
3192 )
3194 test_data = (
3195 Test("detector", "detector", (1, 2, 3, 4)),
3196 Test("detector", "-detector", (4, 3, 2, 1)),
3197 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3198 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3199 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3200 Test("visit", "visit", (1, 2)),
3201 Test("visit", "-visit.id", (2, 1)),
3202 Test("visit", "zenith_angle", (1, 2)),
3203 Test("visit", "-visit.name", (2, 1)),
3204 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3205 )
3207 for test in test_data:
3208 order_by = test.order_by.split(",")
3209 query = do_query(test.element).order_by(*order_by)
3210 if test.limit is not None:
3211 query = query.limit(*test.limit)
3212 dataIds = tuple(rec.id for rec in query)
3213 self.assertEqual(dataIds, test.result)
3215 # errors in a name
3216 for order_by in ("", "-"):
3217 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3218 list(do_query("detector").order_by(order_by))
3220 for order_by in ("undimension.name", "-undimension.name"):
3221 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3222 list(do_query("detector").order_by(order_by))
3224 for order_by in ("attract", "-attract"):
3225 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3226 list(do_query("detector").order_by(order_by))
3228 for order_by in ("timestamp.begin", "-timestamp.begin"):
3229 with self.assertRaisesRegex(
3230 ValueError,
3231 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3232 r"perhaps you meant 'timespan.begin'\?",
3233 ):
3234 list(do_query("visit").order_by(order_by))
3236 def testQueryDimensionRecordsExceptions(self):
3237 """Test exceptions raised by queryDimensionRecords()."""
3238 registry = self.makeRegistry()
3239 self.loadData(registry, "base.yaml")
3240 self.loadData(registry, "datasets.yaml")
3241 self.loadData(registry, "spatial.yaml")
3243 result = registry.queryDimensionRecords("detector")
3244 self.assertEqual(result.count(), 4)
3245 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3246 self.assertEqual(result.count(), 4)
3247 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3248 self.assertEqual(result.count(), 4)
3249 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3250 self.assertEqual(result.count(), 4)
3251 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3252 self.assertEqual(result.count(), 4)
3254 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3255 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3256 result.count()
3258 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3259 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3260 result.count()
3262 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3263 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3264 result.count()
3266 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3267 result = registry.queryDimensionRecords(
3268 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3269 )
3270 result.count()
3272 def testDatasetConstrainedDimensionRecordQueries(self):
3273 """Test that queryDimensionRecords works even when given a dataset
3274 constraint whose dimensions extend beyond the requested dimension
3275 element's.
3276 """
3277 registry = self.makeRegistry()
3278 self.loadData(registry, "base.yaml")
3279 self.loadData(registry, "datasets.yaml")
3280 # Query for physical_filter dimension records, using a dataset that
3281 # has both physical_filter and dataset dimensions.
3282 records = registry.queryDimensionRecords(
3283 "physical_filter",
3284 datasets=["flat"],
3285 collections="imported_r",
3286 )
3287 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3288 # Trying to constrain by all dataset types is an error.
3289 with self.assertRaises(TypeError):
3290 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3292 def testSkyPixDatasetQueries(self):
3293 """Test that we can build queries involving skypix dimensions as long
3294 as a dataset type that uses those dimensions is included.
3295 """
3296 registry = self.makeRegistry()
3297 self.loadData(registry, "base.yaml")
3298 dataset_type = DatasetType(
3299 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3300 )
3301 registry.registerDatasetType(dataset_type)
3302 run = "r"
3303 registry.registerRun(run)
3304 # First try queries where there are no datasets; the concern is whether
3305 # we can even build and execute these queries without raising, even
3306 # when "doomed" query shortcuts are in play.
3307 self.assertFalse(
3308 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3309 )
3310 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3311 # Now add a dataset and see that we can get it back.
3312 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3313 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3314 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3315 self.assertEqual(
3316 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3317 {data_id},
3318 )
3319 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3321 def testDatasetIdFactory(self):
3322 """Simple test for DatasetIdFactory, mostly to catch potential changes
3323 in its API.
3324 """
3325 registry = self.makeRegistry()
3326 factory = DatasetIdFactory()
3327 dataset_type = DatasetType(
3328 "datasetType",
3329 dimensions=["detector", "instrument"],
3330 universe=registry.dimensions,
3331 storageClass="int",
3332 )
3333 run = "run"
3334 data_id = DataCoordinate.standardize(
3335 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3336 )
3338 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3339 self.assertIsInstance(datasetId, uuid.UUID)
3340 self.assertEqual(datasetId.version, 4)
3342 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3343 self.assertIsInstance(datasetId, uuid.UUID)
3344 self.assertEqual(datasetId.version, 5)
3346 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3347 self.assertIsInstance(datasetId, uuid.UUID)
3348 self.assertEqual(datasetId.version, 5)
3350 def testExposureQueries(self):
3351 """Test query methods using arguments sourced from the exposure log
3352 service.
3354 The most complete test dataset currently available to daf_butler tests
3355 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3356 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3357 dimension records as it was focused on providing nontrivial spatial
3358 overlaps between visit+detector and tract+patch. So in this test we
3359 need to translate queries that originally used the exposure dimension
3360 to use the (very similar) visit dimension instead.
3361 """
3362 registry = self.makeRegistry()
3363 self.loadData(registry, "hsc-rc2-subset.yaml")
3364 self.assertEqual(
3365 [
3366 record.id
3367 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3368 .order_by("id")
3369 .limit(5)
3370 ],
3371 [318, 322, 326, 330, 332],
3372 )
3373 self.assertEqual(
3374 [
3375 data_id["visit"]
3376 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3377 ],
3378 [318, 322, 326, 330, 332],
3379 )
3380 self.assertEqual(
3381 [
3382 record.id
3383 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3384 .order_by("full_name")
3385 .limit(5)
3386 ],
3387 [73, 72, 71, 70, 65],
3388 )
3389 self.assertEqual(
3390 [
3391 data_id["detector"]
3392 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3393 .order_by("full_name")
3394 .limit(5)
3395 ],
3396 [73, 72, 71, 70, 65],
3397 )
3399 def test_long_query_names(self) -> None:
3400 """Test that queries involving very long names are handled correctly.
3402 This is especially important for PostgreSQL, which truncates symbols
3403 longer than 64 chars, but it's worth testing for all DBs.
3404 """
3405 registry = self.makeRegistry()
3406 name = "abcd" * 17
3407 registry.registerDatasetType(
3408 DatasetType(
3409 name,
3410 dimensions=(),
3411 storageClass="Exposure",
3412 universe=registry.dimensions,
3413 )
3414 )
3415 # Need to search more than one collection actually containing a
3416 # matching dataset to avoid optimizations that sidestep bugs due to
3417 # truncation by making findFirst=True a no-op.
3418 run1 = "run1"
3419 registry.registerRun(run1)
3420 run2 = "run2"
3421 registry.registerRun(run2)
3422 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3423 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3424 self.assertEqual(
3425 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3426 {ref1},
3427 )
3429 def test_skypix_constraint_queries(self) -> None:
3430 """Test queries spatially constrained by a skypix data ID."""
3431 registry = self.makeRegistry()
3432 self.loadData(registry, "hsc-rc2-subset.yaml")
3433 patch_regions = {
3434 (data_id["tract"], data_id["patch"]): data_id.region
3435 for data_id in registry.queryDataIds(["patch"]).expanded()
3436 }
3437 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3438 # This check ensures the test doesn't become trivial due to a config
3439 # change; if it does, just pick a different HTML level.
3440 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3441 # Gather all skypix IDs that definitely overlap at least one of these
3442 # patches.
3443 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3444 for patch_region in patch_regions.values():
3445 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3446 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3447 # and does not overlap at least one other patch.
3448 for skypix_id in itertools.chain.from_iterable(
3449 range(begin, end) for begin, end in relevant_skypix_ids
3450 ):
3451 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3452 overlapping_patches = {
3453 patch_key
3454 for patch_key, patch_region in patch_regions.items()
3455 if not patch_region.isDisjointFrom(skypix_region)
3456 }
3457 if overlapping_patches and overlapping_patches != patch_regions.keys():
3458 break
3459 else:
3460 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3461 self.assertEqual(
3462 {
3463 (data_id["tract"], data_id["patch"])
3464 for data_id in registry.queryDataIds(
3465 ["patch"],
3466 dataId={skypix_dimension.name: skypix_id},
3467 )
3468 },
3469 overlapping_patches,
3470 )
3471 # Test that a three-way join that includes the common skypix system in
3472 # the dimensions doesn't generate redundant join terms in the query.
3473 full_data_ids = set(
3474 registry.queryDataIds(
3475 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3476 ).expanded()
3477 )
3478 self.assertGreater(len(full_data_ids), 0)
3479 for data_id in full_data_ids:
3480 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3481 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3483 def test_spatial_constraint_queries(self) -> None:
3484 """Test queries in which one spatial dimension in the constraint (data
3485 ID or ``where`` string) constrains a different spatial dimension in the
3486 query result columns.
3487 """
3488 registry = self.makeRegistry()
3489 self.loadData(registry, "hsc-rc2-subset.yaml")
3490 patch_regions = {
3491 (data_id["tract"], data_id["patch"]): data_id.region
3492 for data_id in registry.queryDataIds(["patch"]).expanded()
3493 }
3494 observation_regions = {
3495 (data_id["visit"], data_id["detector"]): data_id.region
3496 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3497 }
3498 all_combos = {
3499 (patch_key, observation_key)
3500 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3501 }
3502 overlapping_combos = {
3503 (patch_key, observation_key)
3504 for patch_key, observation_key in all_combos
3505 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3506 }
3507 # Check a direct spatial join with no constraint first.
3508 self.assertEqual(
3509 {
3510 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3511 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3512 },
3513 overlapping_combos,
3514 )
3515 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3516 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3517 for patch_key, observation_key in overlapping_combos:
3518 overlaps_by_patch[patch_key].add(observation_key)
3519 overlaps_by_observation[observation_key].add(patch_key)
3520 # Find patches and observations that overlap at least one of the other
3521 # but not all of the other.
3522 nontrivial_patch = next(
3523 iter(
3524 patch_key
3525 for patch_key, observation_keys in overlaps_by_patch.items()
3526 if observation_keys and observation_keys != observation_regions.keys()
3527 )
3528 )
3529 nontrivial_observation = next(
3530 iter(
3531 observation_key
3532 for observation_key, patch_keys in overlaps_by_observation.items()
3533 if patch_keys and patch_keys != patch_regions.keys()
3534 )
3535 )
3536 # Use the nontrivial patches and observations as constraints on the
3537 # other dimensions in various ways, first via a 'where' expression.
3538 # It's better in general to us 'bind' instead of f-strings, but these
3539 # all integers so there are no quoting concerns.
3540 self.assertEqual(
3541 {
3542 (data_id["visit"], data_id["detector"])
3543 for data_id in registry.queryDataIds(
3544 ["visit", "detector"],
3545 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3546 skymap="hsc_rings_v1",
3547 )
3548 },
3549 overlaps_by_patch[nontrivial_patch],
3550 )
3551 self.assertEqual(
3552 {
3553 (data_id["tract"], data_id["patch"])
3554 for data_id in registry.queryDataIds(
3555 ["patch"],
3556 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3557 instrument="HSC",
3558 )
3559 },
3560 overlaps_by_observation[nontrivial_observation],
3561 )
3562 # and then via the dataId argument.
3563 self.assertEqual(
3564 {
3565 (data_id["visit"], data_id["detector"])
3566 for data_id in registry.queryDataIds(
3567 ["visit", "detector"],
3568 dataId={
3569 "tract": nontrivial_patch[0],
3570 "patch": nontrivial_patch[1],
3571 },
3572 skymap="hsc_rings_v1",
3573 )
3574 },
3575 overlaps_by_patch[nontrivial_patch],
3576 )
3577 self.assertEqual(
3578 {
3579 (data_id["tract"], data_id["patch"])
3580 for data_id in registry.queryDataIds(
3581 ["patch"],
3582 dataId={
3583 "visit": nontrivial_observation[0],
3584 "detector": nontrivial_observation[1],
3585 },
3586 instrument="HSC",
3587 )
3588 },
3589 overlaps_by_observation[nontrivial_observation],
3590 )
3592 def test_query_projection_drop_postprocessing(self) -> None:
3593 """Test that projections and deduplications on query objects can
3594 drop post-query region filtering to ensure the query remains in
3595 the SQL engine.
3596 """
3597 registry = self.makeRegistry()
3598 self.loadData(registry, "base.yaml")
3599 self.loadData(registry, "spatial.yaml")
3601 def pop_transfer(tree: Relation) -> Relation:
3602 """If a relation tree terminates with a transfer to a new engine,
3603 return the relation prior to that transfer. If not, return the
3604 original relation.
3606 Parameters
3607 ----------
3608 tree : `Relation`
3609 The relation tree to modify.
3610 """
3611 match tree:
3612 case Transfer(target=target):
3613 return target
3614 case _:
3615 return tree
3617 # There's no public way to get a Query object yet, so we get one from a
3618 # DataCoordinateQueryResults private attribute. When a public API is
3619 # available this test should use it.
3620 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3621 # We expect this query to terminate in the iteration engine originally,
3622 # because region-filtering is necessary.
3623 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3624 # If we deduplicate, we usually have to do that downstream of the
3625 # filtering. That means the deduplication has to happen in the
3626 # iteration engine.
3627 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3628 # If we pass drop_postprocessing, we instead drop the region filtering
3629 # so the deduplication can happen in SQL (though there might still be
3630 # transfer to iteration at the tail of the tree that we can ignore;
3631 # that's what the pop_transfer takes care of here).
3632 self.assertIsInstance(
3633 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3634 sql.Engine,
3635 )
3637 def test_query_find_datasets_drop_postprocessing(self) -> None:
3638 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3639 problems with the FindFirstDataset relation operation.
3640 """
3641 # Setup: load some visit, tract, and patch records, and insert two
3642 # datasets with dimensions {visit, patch}, with one in each of two
3643 # RUN collections.
3644 registry = self.makeRegistry()
3645 self.loadData(registry, "base.yaml")
3646 self.loadData(registry, "spatial.yaml")
3647 storage_class = StorageClass("Warpy")
3648 registry.storageClasses.registerStorageClass(storage_class)
3649 dataset_type = DatasetType(
3650 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3651 )
3652 registry.registerDatasetType(dataset_type)
3653 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3654 registry.registerRun("run1")
3655 registry.registerRun("run2")
3656 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3657 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3658 # Query for the dataset using queryDataIds(...).findDatasets(...)
3659 # against only one of the two collections. This should work even
3660 # though the relation returned by queryDataIds ends with
3661 # iteration-engine region-filtering, because we can recognize before
3662 # running the query that there is only one collecton to search and
3663 # hence the (default) findFirst=True is irrelevant, and joining in the
3664 # dataset query commutes past the iteration-engine postprocessing.
3665 query1 = registry.queryDataIds(
3666 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3667 )
3668 self.assertEqual(
3669 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3670 {ref1},
3671 )
3672 # Query for the dataset using queryDataIds(...).findDatasets(...)
3673 # against both collections. This can only work if the FindFirstDataset
3674 # operation can be commuted past the iteration-engine options into SQL.
3675 query2 = registry.queryDataIds(
3676 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3677 )
3678 self.assertEqual(
3679 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3680 {ref2},
3681 )
3683 def test_query_empty_collections(self) -> None:
3684 """Test for registry query methods with empty collections. The methods
3685 should return empty result set (or None when applicable) and provide
3686 "doomed" diagnostics.
3687 """
3688 registry = self.makeRegistry()
3689 self.loadData(registry, "base.yaml")
3690 self.loadData(registry, "datasets.yaml")
3692 # Tests for registry.findDataset()
3693 with self.assertRaises(NoDefaultCollectionError):
3694 registry.findDataset("bias", instrument="Cam1", detector=1)
3695 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3696 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3698 # Tests for registry.queryDatasets()
3699 with self.assertRaises(NoDefaultCollectionError):
3700 registry.queryDatasets("bias")
3701 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3703 result = registry.queryDatasets("bias", collections=[])
3704 self.assertEqual(len(list(result)), 0)
3705 messages = list(result.explain_no_results())
3706 self.assertTrue(messages)
3707 self.assertTrue(any("because collection list is empty" in message for message in messages))
3709 # Tests for registry.queryDataIds()
3710 with self.assertRaises(NoDefaultCollectionError):
3711 registry.queryDataIds("detector", datasets="bias")
3712 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3714 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3715 self.assertEqual(len(list(result)), 0)
3716 messages = list(result.explain_no_results())
3717 self.assertTrue(messages)
3718 self.assertTrue(any("because collection list is empty" in message for message in messages))
3720 # Tests for registry.queryDimensionRecords()
3721 with self.assertRaises(NoDefaultCollectionError):
3722 registry.queryDimensionRecords("detector", datasets="bias")
3723 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3725 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3726 self.assertEqual(len(list(result)), 0)
3727 messages = list(result.explain_no_results())
3728 self.assertTrue(messages)
3729 self.assertTrue(any("because collection list is empty" in message for message in messages))
3731 def test_dataset_followup_spatial_joins(self) -> None:
3732 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3733 is involved.
3734 """
3735 registry = self.makeRegistry()
3736 self.loadData(registry, "base.yaml")
3737 self.loadData(registry, "spatial.yaml")
3738 pvi_dataset_type = DatasetType(
3739 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3740 )
3741 registry.registerDatasetType(pvi_dataset_type)
3742 collection = "datasets"
3743 registry.registerRun(collection)
3744 (pvi1,) = registry.insertDatasets(
3745 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3746 )
3747 (pvi2,) = registry.insertDatasets(
3748 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3749 )
3750 (pvi3,) = registry.insertDatasets(
3751 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3752 )
3753 self.assertEqual(
3754 set(
3755 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3756 .expanded()
3757 .findRelatedDatasets("pvi", [collection])
3758 ),
3759 {
3760 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3761 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3762 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3763 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3764 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3765 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3766 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3767 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3768 },
3769 )