Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%
1541 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-15 02:03 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from datetime import timedelta
45from threading import Barrier
47import astropy.time
48import sqlalchemy
50try:
51 import numpy as np
52except ImportError:
53 np = None
55import lsst.sphgeom
56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
58from ..._dataset_association import DatasetAssociation
59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
60from ..._dataset_type import DatasetType
61from ..._exceptions import CollectionTypeError, MissingCollectionError, MissingDatasetTypeError
62from ..._exceptions_legacy import DatasetTypeError
63from ..._storage_class import StorageClass
64from ..._timespan import Timespan
65from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
66from .._collection_summary import CollectionSummary
67from .._collection_type import CollectionType
68from .._config import RegistryConfig
69from .._exceptions import (
70 ArgumentError,
71 CollectionError,
72 ConflictingDefinitionError,
73 DataIdValueError,
74 DatasetTypeExpressionError,
75 InconsistentDataIdError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from .._registry import Registry
80from ..interfaces import ButlerAttributeExistsError
81from ..sql_registry import SqlRegistry
84class RegistryTests(ABC):
85 """Generic tests for the `SqlRegistry` class that can be subclassed to
86 generate tests for different configurations.
87 """
89 collectionsManager: str | None = None
90 """Name of the collections manager class, if subclass provides value for
91 this member then it overrides name specified in default configuration
92 (`str`).
93 """
95 datasetsManager: str | dict[str, str] | None = None
96 """Name or configuration dictionary of the datasets manager class, if
97 subclass provides value for this member then it overrides name specified
98 in default configuration (`str` or `dict`).
99 """
101 supportsCollectionRegex: bool = True
102 """True if the registry class being tested supports regex searches for
103 collections."""
105 @classmethod
106 @abstractmethod
107 def getDataDir(cls) -> str:
108 """Return the root directory containing test data YAML files."""
109 raise NotImplementedError()
111 def makeRegistryConfig(self) -> RegistryConfig:
112 """Create RegistryConfig used to create a registry.
114 This method should be called by a subclass from `makeRegistry`.
115 Returned instance will be pre-configured based on the values of class
116 members, and default-configured for all other parameters. Subclasses
117 that need default configuration should just instantiate
118 `RegistryConfig` directly.
119 """
120 config = RegistryConfig()
121 if self.collectionsManager:
122 config["managers", "collections"] = self.collectionsManager
123 if self.datasetsManager:
124 config["managers", "datasets"] = self.datasetsManager
125 return config
127 @abstractmethod
128 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
129 """Return the Registry instance to be tested.
131 Parameters
132 ----------
133 share_repo_with : `Registry`, optional
134 If provided, the new registry should point to the same data
135 repository as this existing registry.
137 Returns
138 -------
139 registry : `Registry`
140 New `Registry` instance, or `None` *only* if `share_repo_with`
141 is not `None` and this test case does not support that argument
142 (e.g. it is impossible with in-memory SQLite DBs).
143 """
144 raise NotImplementedError()
146 def loadData(self, registry: SqlRegistry, filename: str) -> None:
147 """Load registry test data from ``getDataDir/<filename>``,
148 which should be a YAML import/export file.
150 Parameters
151 ----------
152 registry : `SqlRegistry`
153 The registry to load into.
154 filename : `str`
155 The name of the file to load.
156 """
157 from ...transfers import YamlRepoImportBackend
159 with open(os.path.join(self.getDataDir(), filename)) as stream:
160 backend = YamlRepoImportBackend(stream, registry)
161 backend.register()
162 backend.load(datastore=None)
164 def checkQueryResults(self, results, expected):
165 """Check that a query results object contains expected values.
167 Parameters
168 ----------
169 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
170 A lazy-evaluation query results object.
171 expected : `list`
172 A list of `DataCoordinate` o `DatasetRef` objects that should be
173 equal to results of the query, aside from ordering.
174 """
175 self.assertCountEqual(list(results), expected)
176 self.assertEqual(results.count(), len(expected))
177 if expected:
178 self.assertTrue(results.any())
179 else:
180 self.assertFalse(results.any())
182 def testOpaque(self):
183 """Tests for `SqlRegistry.registerOpaqueTable`,
184 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
185 `SqlRegistry.deleteOpaqueData`.
186 """
187 registry = self.makeRegistry()
188 table = "opaque_table_for_testing"
189 registry.registerOpaqueTable(
190 table,
191 spec=ddl.TableSpec(
192 fields=[
193 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
194 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
195 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
196 ],
197 ),
198 )
199 rows = [
200 {"id": 1, "name": "one", "count": None},
201 {"id": 2, "name": "two", "count": 5},
202 {"id": 3, "name": "three", "count": 6},
203 ]
204 registry.insertOpaqueData(table, *rows)
205 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
206 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
207 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
208 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
209 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
210 # Test very long IN clause which exceeds sqlite limit on number of
211 # parameters. SQLite says the limit is 32k but it looks like it is
212 # much higher.
213 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
214 # Two IN clauses, each longer than 1k batch size, first with
215 # duplicates, second has matching elements in different batches (after
216 # sorting).
217 self.assertEqual(
218 rows[0:2],
219 list(
220 registry.fetchOpaqueData(
221 table,
222 id=list(range(1000)) + list(range(100, 0, -1)),
223 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
224 )
225 ),
226 )
227 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
228 registry.deleteOpaqueData(table, id=3)
229 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
230 registry.deleteOpaqueData(table)
231 self.assertEqual([], list(registry.fetchOpaqueData(table)))
233 def testDatasetType(self):
234 """Tests for `SqlRegistry.registerDatasetType` and
235 `SqlRegistry.getDatasetType`.
236 """
237 registry = self.makeRegistry()
238 # Check valid insert
239 datasetTypeName = "test"
240 storageClass = StorageClass("testDatasetType")
241 registry.storageClasses.registerStorageClass(storageClass)
242 dimensions = registry.dimensions.conform(("instrument", "visit"))
243 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
245 # Inserting for the first time should return True
246 self.assertTrue(registry.registerDatasetType(inDatasetType))
247 outDatasetType1 = registry.getDatasetType(datasetTypeName)
248 self.assertEqual(outDatasetType1, inDatasetType)
250 # Re-inserting should work
251 self.assertFalse(registry.registerDatasetType(inDatasetType))
252 # Except when they are not identical
253 with self.assertRaises(ConflictingDefinitionError):
254 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
255 registry.registerDatasetType(nonIdenticalDatasetType)
257 # Template can be None
258 datasetTypeName = "testNoneTemplate"
259 storageClass = StorageClass("testDatasetType2")
260 registry.storageClasses.registerStorageClass(storageClass)
261 dimensions = registry.dimensions.conform(("instrument", "visit"))
262 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
263 registry.registerDatasetType(inDatasetType)
264 outDatasetType2 = registry.getDatasetType(datasetTypeName)
265 self.assertEqual(outDatasetType2, inDatasetType)
267 allTypes = set(registry.queryDatasetTypes())
268 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
270 def testDimensions(self):
271 """Tests for `SqlRegistry.insertDimensionData`,
272 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
273 """
274 registry = self.makeRegistry()
275 dimensionName = "instrument"
276 dimension = registry.dimensions[dimensionName]
277 dimensionValue = {
278 "name": "DummyCam",
279 "visit_max": 10,
280 "visit_system": 0,
281 "exposure_max": 10,
282 "detector_max": 2,
283 "class_name": "lsst.pipe.base.Instrument",
284 }
285 registry.insertDimensionData(dimensionName, dimensionValue)
286 # Inserting the same value twice should fail
287 with self.assertRaises(sqlalchemy.exc.IntegrityError):
288 registry.insertDimensionData(dimensionName, dimensionValue)
289 # expandDataId should retrieve the record we just inserted
290 self.assertEqual(
291 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
292 .records[dimensionName]
293 .toDict(),
294 dimensionValue,
295 )
296 # expandDataId should raise if there is no record with the given ID.
297 with self.assertRaises(DataIdValueError):
298 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
299 # band doesn't have a table; insert should fail.
300 with self.assertRaises(TypeError):
301 registry.insertDimensionData("band", {"band": "i"})
302 dimensionName2 = "physical_filter"
303 dimension2 = registry.dimensions[dimensionName2]
304 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
305 # Missing required dependency ("instrument") should fail
306 with self.assertRaises(KeyError):
307 registry.insertDimensionData(dimensionName2, dimensionValue2)
308 # Adding required dependency should fix the failure
309 dimensionValue2["instrument"] = "DummyCam"
310 registry.insertDimensionData(dimensionName2, dimensionValue2)
311 # expandDataId should retrieve the record we just inserted.
312 self.assertEqual(
313 registry.expandDataId(
314 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
315 )
316 .records[dimensionName2]
317 .toDict(),
318 dimensionValue2,
319 )
320 # Use syncDimensionData to insert a new record successfully.
321 dimensionName3 = "detector"
322 dimensionValue3 = {
323 "instrument": "DummyCam",
324 "id": 1,
325 "full_name": "one",
326 "name_in_raft": "zero",
327 "purpose": "SCIENCE",
328 }
329 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
330 # Sync that again. Note that one field ("raft") is NULL, and that
331 # should be okay.
332 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
333 # Now try that sync with the same primary key but a different value.
334 # This should fail.
335 with self.assertRaises(ConflictingDefinitionError):
336 registry.syncDimensionData(
337 dimensionName3,
338 {
339 "instrument": "DummyCam",
340 "id": 1,
341 "full_name": "one",
342 "name_in_raft": "four",
343 "purpose": "SCIENCE",
344 },
345 )
347 @unittest.skipIf(np is None, "numpy not available.")
348 def testNumpyDataId(self):
349 """Test that we can use a numpy int in a dataId."""
350 registry = self.makeRegistry()
351 dimensionEntries = [
352 ("instrument", {"instrument": "DummyCam"}),
353 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
354 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
355 # Using an np.int64 here fails unless Records.fromDict is also
356 # patched to look for numbers.Integral
357 (
358 "visit",
359 {
360 "instrument": "DummyCam",
361 "id": 42,
362 "name": "fortytwo",
363 "physical_filter": "d-r",
364 "day_obs": 20250101,
365 },
366 ),
367 ]
368 for args in dimensionEntries:
369 registry.insertDimensionData(*args)
371 # Try a normal integer and something that looks like an int but
372 # is not.
373 for visit_id in (42, np.int64(42)):
374 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
375 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
376 self.assertEqual(expanded["visit"], int(visit_id))
377 self.assertIsInstance(expanded["visit"], int)
379 def testDataIdRelationships(self):
380 """Test that `SqlRegistry.expandDataId` raises an exception when the
381 given keys are inconsistent.
382 """
383 registry = self.makeRegistry()
384 self.loadData(registry, "base.yaml")
385 # Insert a few more dimension records for the next test.
386 registry.insertDimensionData(
387 "day_obs",
388 {"instrument": "Cam1", "id": 20250101},
389 )
390 registry.insertDimensionData(
391 "group",
392 {"instrument": "Cam1", "name": "group1"},
393 )
394 registry.insertDimensionData(
395 "exposure",
396 {
397 "instrument": "Cam1",
398 "id": 1,
399 "obs_id": "one",
400 "physical_filter": "Cam1-G",
401 "group": "group1",
402 "day_obs": 20250101,
403 },
404 )
405 registry.insertDimensionData(
406 "group",
407 {"instrument": "Cam1", "name": "group2"},
408 )
409 registry.insertDimensionData(
410 "exposure",
411 {
412 "instrument": "Cam1",
413 "id": 2,
414 "obs_id": "two",
415 "physical_filter": "Cam1-G",
416 "group": "group2",
417 "day_obs": 20250101,
418 },
419 )
420 registry.insertDimensionData(
421 "visit_system",
422 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
423 )
424 registry.insertDimensionData(
425 "visit",
426 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
427 )
428 registry.insertDimensionData(
429 "visit_definition",
430 {"instrument": "Cam1", "visit": 1, "exposure": 1},
431 )
432 with self.assertRaises(InconsistentDataIdError):
433 registry.expandDataId(
434 {"instrument": "Cam1", "visit": 1, "exposure": 2},
435 )
437 def testDataset(self):
438 """Basic tests for `SqlRegistry.insertDatasets`,
439 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
440 """
441 registry = self.makeRegistry()
442 self.loadData(registry, "base.yaml")
443 run = "tésτ"
444 registry.registerRun(run)
445 datasetType = registry.getDatasetType("bias")
446 dataId = {"instrument": "Cam1", "detector": 2}
447 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
448 outRef = registry.getDataset(ref.id)
449 self.assertIsNotNone(ref.id)
450 self.assertEqual(ref, outRef)
451 with self.assertRaises(ConflictingDefinitionError):
452 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
453 registry.removeDatasets([ref])
454 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
456 def testFindDataset(self):
457 """Tests for `SqlRegistry.findDataset`."""
458 registry = self.makeRegistry()
459 self.loadData(registry, "base.yaml")
460 run = "tésτ"
461 datasetType = registry.getDatasetType("bias")
462 dataId = {"instrument": "Cam1", "detector": 4}
463 registry.registerRun(run)
464 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
465 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
466 self.assertEqual(outputRef, inputRef)
467 # Check that retrieval with invalid dataId raises
468 with self.assertRaises(LookupError):
469 dataId = {"instrument": "Cam1"} # no detector
470 registry.findDataset(datasetType, dataId, collections=run)
471 # Check that different dataIds match to different datasets
472 dataId1 = {"instrument": "Cam1", "detector": 1}
473 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
474 dataId2 = {"instrument": "Cam1", "detector": 2}
475 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
476 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
477 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
478 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
479 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
480 # Check that requesting a non-existing dataId returns None
481 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
482 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
483 # Search more than one collection, in which two have the right
484 # dataset type and another does not.
485 registry.registerRun("empty")
486 self.loadData(registry, "datasets.yaml")
487 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
488 self.assertIsNotNone(bias1)
489 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
490 self.assertIsNotNone(bias2)
491 self.assertEqual(
492 bias1,
493 registry.findDataset(
494 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
495 ),
496 )
497 self.assertEqual(
498 bias2,
499 registry.findDataset(
500 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
501 ),
502 )
503 # Search more than one collection, with one of them a CALIBRATION
504 # collection.
505 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
506 timespan = Timespan(
507 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
508 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
509 )
510 registry.certify("Cam1/calib", [bias2], timespan=timespan)
511 self.assertEqual(
512 bias1,
513 registry.findDataset(
514 "bias",
515 instrument="Cam1",
516 detector=2,
517 collections=["empty", "imported_g", "Cam1/calib"],
518 timespan=timespan,
519 ),
520 )
521 self.assertEqual(
522 bias2,
523 registry.findDataset(
524 "bias",
525 instrument="Cam1",
526 detector=2,
527 collections=["empty", "Cam1/calib", "imported_g"],
528 timespan=timespan,
529 ),
530 )
531 # If we try to search those same collections without a timespan, it
532 # should still work, since the CALIBRATION collection is ignored.
533 self.assertEqual(
534 bias1,
535 registry.findDataset(
536 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
537 ),
538 )
539 self.assertEqual(
540 bias1,
541 registry.findDataset(
542 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
543 ),
544 )
546 def testRemoveDatasetTypeSuccess(self):
547 """Test that SqlRegistry.removeDatasetType works when there are no
548 datasets of that type present.
549 """
550 registry = self.makeRegistry()
551 self.loadData(registry, "base.yaml")
552 registry.removeDatasetType("flat")
553 with self.assertRaises(MissingDatasetTypeError):
554 registry.getDatasetType("flat")
556 def testRemoveDatasetTypeFailure(self):
557 """Test that SqlRegistry.removeDatasetType raises when there are
558 datasets of that type present or if the dataset type is for a
559 component.
560 """
561 registry = self.makeRegistry()
562 self.loadData(registry, "base.yaml")
563 self.loadData(registry, "datasets.yaml")
564 with self.assertRaises(OrphanedRecordError):
565 registry.removeDatasetType("flat")
566 with self.assertRaises(DatasetTypeError):
567 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
569 def testImportDatasetsUUID(self):
570 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
571 if isinstance(self.datasetsManager, str):
572 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
573 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
574 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
575 ".ByDimensionsDatasetRecordStorageManagerUUID"
576 ):
577 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
579 registry = self.makeRegistry()
580 self.loadData(registry, "base.yaml")
581 for run in range(6):
582 registry.registerRun(f"run{run}")
583 datasetTypeBias = registry.getDatasetType("bias")
584 datasetTypeFlat = registry.getDatasetType("flat")
585 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
586 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
587 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
589 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
590 (ref1,) = registry._importDatasets([ref])
591 # UUID is used without change
592 self.assertEqual(ref.id, ref1.id)
594 # All different failure modes
595 refs = (
596 # Importing same DatasetRef with different dataset ID is an error
597 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
598 # Same DatasetId but different DataId
599 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
600 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
601 # Same DatasetRef and DatasetId but different run
602 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
603 )
604 for ref in refs:
605 with self.assertRaises(ConflictingDefinitionError):
606 registry._importDatasets([ref])
608 # Test for non-unique IDs, they can be re-imported multiple times.
609 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
610 with self.subTest(idGenMode=idGenMode):
611 # Make dataset ref with reproducible dataset ID.
612 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
613 (ref1,) = registry._importDatasets([ref])
614 self.assertIsInstance(ref1.id, uuid.UUID)
615 self.assertEqual(ref1.id.version, 5)
616 self.assertEqual(ref1.id, ref.id)
618 # Importing it again is OK
619 (ref2,) = registry._importDatasets([ref1])
620 self.assertEqual(ref2.id, ref1.id)
622 # Cannot import to different run with the same ID
623 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
624 with self.assertRaises(ConflictingDefinitionError):
625 registry._importDatasets([ref])
627 ref = DatasetRef(
628 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
629 )
630 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
631 # Cannot import same DATAID_TYPE ref into a new run
632 with self.assertRaises(ConflictingDefinitionError):
633 (ref2,) = registry._importDatasets([ref])
634 else:
635 # DATAID_TYPE_RUN ref can be imported into a new run
636 (ref2,) = registry._importDatasets([ref])
638 def testComponentLookups(self):
639 """Test searching for component datasets via their parents.
641 Components can no longer be found by registry. This test checks
642 that this now fails.
643 """
644 registry = self.makeRegistry()
645 self.loadData(registry, "base.yaml")
646 self.loadData(registry, "datasets.yaml")
647 # Test getting the child dataset type (which does still exist in the
648 # Registry), and check for consistency with
649 # DatasetRef.makeComponentRef.
650 collection = "imported_g"
651 parentType = registry.getDatasetType("bias")
652 childType = registry.getDatasetType("bias.wcs")
653 parentRefResolved = registry.findDataset(
654 parentType, collections=collection, instrument="Cam1", detector=1
655 )
656 self.assertIsInstance(parentRefResolved, DatasetRef)
657 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
658 # Search for a single dataset with findDataset.
659 with self.assertRaises(DatasetTypeError):
660 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
662 def testCollections(self):
663 """Tests for registry methods that manage collections."""
664 registry = self.makeRegistry()
665 other_registry = self.makeRegistry(share_repo_with=registry)
666 self.loadData(registry, "base.yaml")
667 self.loadData(registry, "datasets.yaml")
668 run1 = "imported_g"
669 run2 = "imported_r"
670 # Test setting a collection docstring after it has been created.
671 registry.setCollectionDocumentation(run1, "doc for run1")
672 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
673 registry.setCollectionDocumentation(run1, None)
674 self.assertIsNone(registry.getCollectionDocumentation(run1))
675 datasetType = "bias"
676 # Find some datasets via their run's collection.
677 dataId1 = {"instrument": "Cam1", "detector": 1}
678 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
679 self.assertIsNotNone(ref1)
680 dataId2 = {"instrument": "Cam1", "detector": 2}
681 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
682 self.assertIsNotNone(ref2)
683 # Associate those into a new collection, then look for them there.
684 tag1 = "tag1"
685 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
686 # Check that we can query for old and new collections by type.
687 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
688 self.assertEqual(
689 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
690 {tag1, run1, run2},
691 )
692 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
693 registry.associate(tag1, [ref1, ref2])
694 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
695 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
696 # Disassociate one and verify that we can't it there anymore...
697 registry.disassociate(tag1, [ref1])
698 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
699 # ...but we can still find ref2 in tag1, and ref1 in the run.
700 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
701 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
702 collections = set(registry.queryCollections())
703 self.assertEqual(collections, {run1, run2, tag1})
704 # Associate both refs into tag1 again; ref2 is already there, but that
705 # should be a harmless no-op.
706 registry.associate(tag1, [ref1, ref2])
707 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
708 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
709 # Get a different dataset (from a different run) that has the same
710 # dataset type and data ID as ref2.
711 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
712 self.assertNotEqual(ref2, ref2b)
713 # Attempting to associate that into tag1 should be an error.
714 with self.assertRaises(ConflictingDefinitionError):
715 registry.associate(tag1, [ref2b])
716 # That error shouldn't have messed up what we had before.
717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
719 # Attempt to associate the conflicting dataset again, this time with
720 # a dataset that isn't in the collection and won't cause a conflict.
721 # Should also fail without modifying anything.
722 dataId3 = {"instrument": "Cam1", "detector": 3}
723 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
724 with self.assertRaises(ConflictingDefinitionError):
725 registry.associate(tag1, [ref3, ref2b])
726 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
727 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
728 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
729 # Register a chained collection that searches [tag1, run2]
730 chain1 = "chain1"
731 registry.registerCollection(chain1, type=CollectionType.CHAINED)
732 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
733 # Chained collection exists, but has no collections in it.
734 self.assertFalse(registry.getCollectionChain(chain1))
735 # If we query for all collections, we should get the chained collection
736 # only if we don't ask to flatten it (i.e. yield only its children).
737 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
738 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
739 # Attempt to set its child collections to something circular; that
740 # should fail.
741 with self.assertRaises(ValueError):
742 registry.setCollectionChain(chain1, [tag1, chain1])
743 # Add the child collections.
744 registry.setCollectionChain(chain1, [tag1, run2])
745 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
746 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
747 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
748 # Refresh the other registry that points to the same repo, and make
749 # sure it can see the things we've done (note that this does require
750 # an explicit refresh(); that's the documented behavior, because
751 # caching is ~impossible otherwise).
752 if other_registry is not None:
753 other_registry.refresh()
754 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
755 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
756 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
757 # Searching for dataId1 or dataId2 in the chain should return ref1 and
758 # ref2, because both are in tag1.
759 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
760 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
761 # Now disassociate ref2 from tag1. The search (for bias) with
762 # dataId2 in chain1 should then:
763 # 1. not find it in tag1
764 # 2. find a different dataset in run2
765 registry.disassociate(tag1, [ref2])
766 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
767 self.assertNotEqual(ref2b, ref2)
768 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
769 # Define a new chain so we can test recursive chains.
770 chain2 = "chain2"
771 registry.registerCollection(chain2, type=CollectionType.CHAINED)
772 registry.setCollectionChain(chain2, [run2, chain1])
773 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
774 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
776 if self.supportsCollectionRegex:
777 # Query for collections matching a regex.
778 self.assertCountEqual(
779 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
780 ["imported_r", "imported_g"],
781 )
782 # Query for collections matching a regex or an explicit str.
783 self.assertCountEqual(
784 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
785 ["imported_r", "imported_g", "chain1"],
786 )
787 # Same queries as the regex ones above, but using globs instead of
788 # regex.
789 self.assertCountEqual(
790 list(registry.queryCollections("imported_*", flattenChains=False)),
791 ["imported_r", "imported_g"],
792 )
793 # Query for collections matching a regex or an explicit str.
794 self.assertCountEqual(
795 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
796 ["imported_r", "imported_g", "chain1"],
797 )
799 # Search for bias with dataId1 should find it via tag1 in chain2,
800 # recursing, because is not in run1.
801 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
802 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
803 # Search for bias with dataId2 should find it in run2 (ref2b).
804 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
805 # Search for a flat that is in run2. That should not be found
806 # at the front of chain2, because of the restriction to bias
807 # on run2 there, but it should be found in at the end of chain1.
808 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
809 ref4 = registry.findDataset("flat", dataId4, collections=run2)
810 self.assertIsNotNone(ref4)
811 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
812 # Deleting a collection that's part of a CHAINED collection is not
813 # allowed, and is exception-safe.
814 with self.assertRaises(sqlalchemy.exc.IntegrityError):
815 registry.removeCollection(run2)
816 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
817 with self.assertRaises(sqlalchemy.exc.IntegrityError):
818 registry.removeCollection(chain1)
819 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
820 # Actually remove chain2, test that it's gone by asking for its type.
821 registry.removeCollection(chain2)
822 with self.assertRaises(MissingCollectionError):
823 registry.getCollectionType(chain2)
824 # Actually remove run2 and chain1, which should work now.
825 registry.removeCollection(chain1)
826 registry.removeCollection(run2)
827 with self.assertRaises(MissingCollectionError):
828 registry.getCollectionType(run2)
829 with self.assertRaises(MissingCollectionError):
830 registry.getCollectionType(chain1)
831 # Remove tag1 as well, just to test that we can remove TAGGED
832 # collections.
833 registry.removeCollection(tag1)
834 with self.assertRaises(MissingCollectionError):
835 registry.getCollectionType(tag1)
837 def testCollectionChainCaching(self):
838 registry = self.makeRegistry()
839 with registry.caching_context():
840 registry.registerCollection("a")
841 registry.registerCollection("chain", CollectionType.CHAINED)
842 # There used to be a caching bug (DM-43750) that would throw an
843 # exception if you modified a collection chain for a collection
844 # that was already in the cache.
845 registry.setCollectionChain("chain", ["a"])
846 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"])
848 def testCollectionChainFlatten(self):
849 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
850 option.
851 """
852 registry = self.makeRegistry()
853 registry.registerCollection("inner", CollectionType.CHAINED)
854 registry.registerCollection("innermost", CollectionType.RUN)
855 registry.setCollectionChain("inner", ["innermost"])
856 registry.registerCollection("outer", CollectionType.CHAINED)
857 registry.setCollectionChain("outer", ["inner"], flatten=False)
858 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
859 registry.setCollectionChain("outer", ["inner"], flatten=True)
860 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
862 def testCollectionChainPrependConcurrency(self):
863 """Verify that locking via database row locks is working as
864 expected.
865 """
867 def blocked_thread_func(registry: SqlRegistry):
868 # This call will become blocked after it has decided on positions
869 # for the new children in the collection chain, but before
870 # inserting them.
871 registry._managers.collections.prepend_collection_chain("chain", ["a"])
873 def unblocked_thread_func(registry: SqlRegistry):
874 registry._managers.collections.prepend_collection_chain("chain", ["b"])
876 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
878 # blocked_thread_func should have finished first, inserting "a".
879 # unblocked_thread_func should have finished second, prepending "b".
880 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
882 def testCollectionChainReplaceConcurrency(self):
883 """Verify that locking via database row locks is working as
884 expected.
885 """
887 def blocked_thread_func(registry: SqlRegistry):
888 # This call will become blocked after deleting children, but before
889 # inserting new ones.
890 registry.setCollectionChain("chain", ["a"])
892 def unblocked_thread_func(registry: SqlRegistry):
893 registry.setCollectionChain("chain", ["b"])
895 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
897 # blocked_thread_func should have finished first.
898 # unblocked_thread_func should have finished second, overwriting the
899 # chain with "b".
900 self.assertEqual(("b",), registry.getCollectionChain("chain"))
902 def _do_collection_concurrency_test(
903 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]]
904 ) -> SqlRegistry:
905 # This function:
906 # 1. Sets up two registries pointing at the same database.
907 # 2. Start running 'blocked_thread_func' in a background thread,
908 # arranging for it to become blocked during a critical section in
909 # the collections manager.
910 # 3. Wait for 'blocked_thread_func' to reach the critical section
911 # 4. Start running 'unblocked_thread_func'.
912 # 5. Allow both functions to run to completion.
914 # Set up two registries pointing to the same DB
915 registry1 = self.makeRegistry()
916 assert isinstance(registry1, SqlRegistry)
917 registry2 = self.makeRegistry(share_repo_with=registry1)
918 if registry2 is None:
919 # This will happen for in-memory SQL databases.
920 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
922 registry1.registerCollection("chain", CollectionType.CHAINED)
923 for collection in ["a", "b"]:
924 registry1.registerCollection(collection)
926 # Arrange for registry1 to block during its critical section, allowing
927 # us to detect this and control when it becomes unblocked.
928 enter_barrier = Barrier(2, timeout=60)
929 exit_barrier = Barrier(2, timeout=60)
931 def wait_for_barrier():
932 enter_barrier.wait()
933 exit_barrier.wait()
935 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
937 with ThreadPoolExecutor(max_workers=1) as exec1:
938 with ThreadPoolExecutor(max_workers=1) as exec2:
939 future1 = exec1.submit(blocked_thread_func, registry1)
940 enter_barrier.wait()
942 # At this point registry 1 has entered the critical section and
943 # is waiting for us to release it. Start the other thread.
944 future2 = exec2.submit(unblocked_thread_func, registry2)
945 # thread2 should block inside a database call, but we have no
946 # way to detect when it is in this state.
947 time.sleep(0.200)
949 # Let the threads run to completion.
950 exit_barrier.wait()
951 future1.result()
952 future2.result()
954 return registry1
956 def testBasicTransaction(self):
957 """Test that all operations within a single transaction block are
958 rolled back if an exception propagates out of the block.
959 """
960 registry = self.makeRegistry()
961 storageClass = StorageClass("testDatasetType")
962 registry.storageClasses.registerStorageClass(storageClass)
963 with registry.transaction():
964 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
965 with self.assertRaises(ValueError):
966 with registry.transaction():
967 registry.insertDimensionData("instrument", {"name": "Cam2"})
968 raise ValueError("Oops, something went wrong")
969 # Cam1 should exist
970 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
971 # But Cam2 and Cam3 should both not exist
972 with self.assertRaises(DataIdValueError):
973 registry.expandDataId(instrument="Cam2")
974 with self.assertRaises(DataIdValueError):
975 registry.expandDataId(instrument="Cam3")
977 def testNestedTransaction(self):
978 """Test that operations within a transaction block are not rolled back
979 if an exception propagates out of an inner transaction block and is
980 then caught.
981 """
982 registry = self.makeRegistry()
983 dimension = registry.dimensions["instrument"]
984 dataId1 = {"instrument": "DummyCam"}
985 dataId2 = {"instrument": "DummyCam2"}
986 checkpointReached = False
987 with registry.transaction():
988 # This should be added and (ultimately) committed.
989 registry.insertDimensionData(dimension, dataId1)
990 with self.assertRaises(sqlalchemy.exc.IntegrityError):
991 with registry.transaction(savepoint=True):
992 # This does not conflict, and should succeed (but not
993 # be committed).
994 registry.insertDimensionData(dimension, dataId2)
995 checkpointReached = True
996 # This should conflict and raise, triggerring a rollback
997 # of the previous insertion within the same transaction
998 # context, but not the original insertion in the outer
999 # block.
1000 registry.insertDimensionData(dimension, dataId1)
1001 self.assertTrue(checkpointReached)
1002 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
1003 with self.assertRaises(DataIdValueError):
1004 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
1006 def testInstrumentDimensions(self):
1007 """Test queries involving only instrument dimensions, with no joins to
1008 skymap.
1009 """
1010 registry = self.makeRegistry()
1012 # need a bunch of dimensions and datasets for test
1013 registry.insertDimensionData(
1014 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1015 )
1016 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1017 registry.insertDimensionData(
1018 "physical_filter",
1019 dict(instrument="DummyCam", name="dummy_r", band="r"),
1020 dict(instrument="DummyCam", name="dummy_i", band="i"),
1021 )
1022 registry.insertDimensionData(
1023 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1024 )
1025 registry.insertDimensionData(
1026 "visit",
1027 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1028 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1029 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1030 )
1031 registry.insertDimensionData(
1032 "group",
1033 dict(instrument="DummyCam", name="ten"),
1034 dict(instrument="DummyCam", name="eleven"),
1035 dict(instrument="DummyCam", name="twelve"),
1036 )
1037 for i in range(1, 6):
1038 registry.insertDimensionData(
1039 "visit_detector_region",
1040 dict(instrument="DummyCam", visit=10, detector=i),
1041 dict(instrument="DummyCam", visit=11, detector=i),
1042 dict(instrument="DummyCam", visit=20, detector=i),
1043 )
1044 registry.insertDimensionData(
1045 "exposure",
1046 dict(
1047 instrument="DummyCam",
1048 id=100,
1049 obs_id="100",
1050 physical_filter="dummy_i",
1051 group="ten",
1052 day_obs=20250101,
1053 ),
1054 dict(
1055 instrument="DummyCam",
1056 id=101,
1057 obs_id="101",
1058 physical_filter="dummy_i",
1059 group="ten",
1060 day_obs=20250101,
1061 ),
1062 dict(
1063 instrument="DummyCam",
1064 id=110,
1065 obs_id="110",
1066 physical_filter="dummy_r",
1067 group="eleven",
1068 day_obs=20250101,
1069 ),
1070 dict(
1071 instrument="DummyCam",
1072 id=111,
1073 obs_id="111",
1074 physical_filter="dummy_r",
1075 group="eleven",
1076 day_obs=20250101,
1077 ),
1078 dict(
1079 instrument="DummyCam",
1080 id=200,
1081 obs_id="200",
1082 physical_filter="dummy_r",
1083 group="twelve",
1084 day_obs=20250101,
1085 ),
1086 dict(
1087 instrument="DummyCam",
1088 id=201,
1089 obs_id="201",
1090 physical_filter="dummy_r",
1091 group="twelve",
1092 day_obs=20250101,
1093 ),
1094 )
1095 registry.insertDimensionData(
1096 "visit_definition",
1097 dict(instrument="DummyCam", exposure=100, visit=10),
1098 dict(instrument="DummyCam", exposure=101, visit=10),
1099 dict(instrument="DummyCam", exposure=110, visit=11),
1100 dict(instrument="DummyCam", exposure=111, visit=11),
1101 dict(instrument="DummyCam", exposure=200, visit=20),
1102 dict(instrument="DummyCam", exposure=201, visit=20),
1103 )
1104 # dataset types
1105 run1 = "test1_r"
1106 run2 = "test2_r"
1107 tagged2 = "test2_t"
1108 registry.registerRun(run1)
1109 registry.registerRun(run2)
1110 registry.registerCollection(tagged2)
1111 storageClass = StorageClass("testDataset")
1112 registry.storageClasses.registerStorageClass(storageClass)
1113 rawType = DatasetType(
1114 name="RAW",
1115 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1116 storageClass=storageClass,
1117 )
1118 registry.registerDatasetType(rawType)
1119 calexpType = DatasetType(
1120 name="CALEXP",
1121 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1122 storageClass=storageClass,
1123 )
1124 registry.registerDatasetType(calexpType)
1126 # add pre-existing datasets
1127 for exposure in (100, 101, 110, 111):
1128 for detector in (1, 2, 3):
1129 # note that only 3 of 5 detectors have datasets
1130 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1131 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1132 # exposures 100 and 101 appear in both run1 and tagged2.
1133 # 100 has different datasets in the different collections
1134 # 101 has the same dataset in both collections.
1135 if exposure == 100:
1136 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1137 if exposure in (100, 101):
1138 registry.associate(tagged2, [ref])
1139 # Add pre-existing datasets to tagged2.
1140 for exposure in (200, 201):
1141 for detector in (3, 4, 5):
1142 # note that only 3 of 5 detectors have datasets
1143 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1144 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1145 registry.associate(tagged2, [ref])
1147 dimensions = registry.dimensions.conform(
1148 rawType.dimensions.required.names | calexpType.dimensions.required.names
1149 )
1150 # Test that single dim string works as well as list of str
1151 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1152 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1153 self.assertEqual(rows, rowsI)
1154 # with empty expression
1155 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1156 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1157 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1158 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1159 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1161 # second collection
1162 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1163 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1164 for dataId in rows:
1165 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1166 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1167 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1168 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1170 # with two input datasets
1171 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1172 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1173 for dataId in rows:
1174 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1175 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1176 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1177 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1179 # limit to single visit
1180 rows = registry.queryDataIds(
1181 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1182 ).toSet()
1183 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1184 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1185 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1186 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1188 # more limiting expression, using link names instead of Table.column
1189 rows = registry.queryDataIds(
1190 dimensions,
1191 datasets=rawType,
1192 collections=run1,
1193 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1194 ).toSet()
1195 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1196 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1197 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1198 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1200 # queryDataIds with only one of `datasets` and `collections` is an
1201 # error.
1202 with self.assertRaises(CollectionError):
1203 registry.queryDataIds(dimensions, datasets=rawType)
1204 with self.assertRaises(ArgumentError):
1205 registry.queryDataIds(dimensions, collections=run1)
1207 # expression excludes everything
1208 rows = registry.queryDataIds(
1209 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1210 ).toSet()
1211 self.assertEqual(len(rows), 0)
1213 # Selecting by physical_filter, this is not in the dimensions, but it
1214 # is a part of the full expression so it should work too.
1215 rows = registry.queryDataIds(
1216 dimensions,
1217 datasets=rawType,
1218 collections=run1,
1219 where="physical_filter = 'dummy_r'",
1220 instrument="DummyCam",
1221 ).toSet()
1222 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1223 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1224 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1225 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1227 def testSkyMapDimensions(self):
1228 """Tests involving only skymap dimensions, no joins to instrument."""
1229 registry = self.makeRegistry()
1231 # need a bunch of dimensions and datasets for test, we want
1232 # "band" in the test so also have to add physical_filter
1233 # dimensions
1234 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1235 registry.insertDimensionData(
1236 "physical_filter",
1237 dict(instrument="DummyCam", name="dummy_r", band="r"),
1238 dict(instrument="DummyCam", name="dummy_i", band="i"),
1239 )
1240 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1241 for tract in range(10):
1242 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1243 registry.insertDimensionData(
1244 "patch",
1245 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1246 )
1248 # dataset types
1249 run = "tésτ"
1250 registry.registerRun(run)
1251 storageClass = StorageClass("testDataset")
1252 registry.storageClasses.registerStorageClass(storageClass)
1253 calexpType = DatasetType(
1254 name="deepCoadd_calexp",
1255 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1256 storageClass=storageClass,
1257 )
1258 registry.registerDatasetType(calexpType)
1259 mergeType = DatasetType(
1260 name="deepCoadd_mergeDet",
1261 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1262 storageClass=storageClass,
1263 )
1264 registry.registerDatasetType(mergeType)
1265 measType = DatasetType(
1266 name="deepCoadd_meas",
1267 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1268 storageClass=storageClass,
1269 )
1270 registry.registerDatasetType(measType)
1272 dimensions = registry.dimensions.conform(
1273 calexpType.dimensions.required.names
1274 | mergeType.dimensions.required.names
1275 | measType.dimensions.required.names
1276 )
1278 # add pre-existing datasets
1279 for tract in (1, 3, 5):
1280 for patch in (2, 4, 6, 7):
1281 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1282 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1283 for aFilter in ("i", "r"):
1284 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1285 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1287 # with empty expression
1288 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1289 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1290 for dataId in rows:
1291 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1292 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1293 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1294 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1296 # limit to 2 tracts and 2 patches
1297 rows = registry.queryDataIds(
1298 dimensions,
1299 datasets=[calexpType, mergeType],
1300 collections=run,
1301 where="tract IN (1, 5) AND patch IN (2, 7)",
1302 skymap="DummyMap",
1303 ).toSet()
1304 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1305 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1306 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1307 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1309 # limit to single filter
1310 rows = registry.queryDataIds(
1311 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1312 ).toSet()
1313 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1314 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1315 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1316 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1318 # Specifying non-existing skymap is an exception
1319 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1320 rows = registry.queryDataIds(
1321 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1322 ).toSet()
1324 def testSpatialJoin(self):
1325 """Test queries that involve spatial overlap joins."""
1326 registry = self.makeRegistry()
1327 self.loadData(registry, "hsc-rc2-subset.yaml")
1329 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1330 # the TopologicalFamily they belong to. We'll relate all elements in
1331 # each family to all of the elements in each other family.
1332 families = defaultdict(set)
1333 # Dictionary of {element.name: {dataId: region}}.
1334 regions = {}
1335 for element in registry.dimensions.database_elements:
1336 if element.spatial is not None:
1337 families[element.spatial.name].add(element)
1338 regions[element.name] = {
1339 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1340 }
1342 # If this check fails, it's not necessarily a problem - it may just be
1343 # a reasonable change to the default dimension definitions - but the
1344 # test below depends on there being more than one family to do anything
1345 # useful.
1346 self.assertEqual(len(families), 2)
1348 # Overlap DatabaseDimensionElements with each other.
1349 for family1, family2 in itertools.combinations(families, 2):
1350 for element1, element2 in itertools.product(families[family1], families[family2]):
1351 dimensions = element1.minimal_group | element2.minimal_group
1352 # Construct expected set of overlapping data IDs via a
1353 # brute-force comparison of the regions we've already fetched.
1354 expected = {
1355 DataCoordinate.standardize(
1356 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1357 )
1358 for (dataId1, region1), (dataId2, region2) in itertools.product(
1359 regions[element1.name].items(), regions[element2.name].items()
1360 )
1361 if not region1.isDisjointFrom(region2)
1362 }
1363 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1364 queried = set(registry.queryDataIds(dimensions))
1365 self.assertEqual(expected, queried)
1367 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1368 commonSkyPix = registry.dimensions.commonSkyPix
1369 for elementName, these_regions in regions.items():
1370 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1371 expected = set()
1372 for dataId, region in these_regions.items():
1373 for begin, end in commonSkyPix.pixelization.envelope(region):
1374 expected.update(
1375 DataCoordinate.standardize(
1376 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1377 )
1378 for index in range(begin, end)
1379 )
1380 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1381 queried = set(registry.queryDataIds(dimensions))
1382 self.assertEqual(expected, queried)
1384 def testAbstractQuery(self):
1385 """Test that we can run a query that just lists the known
1386 bands. This is tricky because band is
1387 backed by a query against physical_filter.
1388 """
1389 registry = self.makeRegistry()
1390 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1391 registry.insertDimensionData(
1392 "physical_filter",
1393 dict(instrument="DummyCam", name="dummy_i", band="i"),
1394 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1395 dict(instrument="DummyCam", name="dummy_r", band="r"),
1396 )
1397 rows = registry.queryDataIds(["band"]).toSet()
1398 self.assertCountEqual(
1399 rows,
1400 [
1401 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1402 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1403 ],
1404 )
1406 def testAttributeManager(self):
1407 """Test basic functionality of attribute manager."""
1408 # number of attributes with schema versions in a fresh database,
1409 # 6 managers with 2 records per manager, plus config for dimensions
1410 VERSION_COUNT = 6 * 2 + 1
1412 registry = self.makeRegistry()
1413 attributes = registry._managers.attributes
1415 # check what get() returns for non-existing key
1416 self.assertIsNone(attributes.get("attr"))
1417 self.assertEqual(attributes.get("attr", ""), "")
1418 self.assertEqual(attributes.get("attr", "Value"), "Value")
1419 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1421 # cannot store empty key or value
1422 with self.assertRaises(ValueError):
1423 attributes.set("", "value")
1424 with self.assertRaises(ValueError):
1425 attributes.set("attr", "")
1427 # set value of non-existing key
1428 attributes.set("attr", "value")
1429 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1430 self.assertEqual(attributes.get("attr"), "value")
1432 # update value of existing key
1433 with self.assertRaises(ButlerAttributeExistsError):
1434 attributes.set("attr", "value2")
1436 attributes.set("attr", "value2", force=True)
1437 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1438 self.assertEqual(attributes.get("attr"), "value2")
1440 # delete existing key
1441 self.assertTrue(attributes.delete("attr"))
1442 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1444 # delete non-existing key
1445 self.assertFalse(attributes.delete("non-attr"))
1447 # store bunch of keys and get the list back
1448 data = [
1449 ("version.core", "1.2.3"),
1450 ("version.dimensions", "3.2.1"),
1451 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1452 ]
1453 for key, value in data:
1454 attributes.set(key, value)
1455 items = dict(attributes.items())
1456 for key, value in data:
1457 self.assertEqual(items[key], value)
1459 def testQueryDatasetsDeduplication(self):
1460 """Test that the findFirst option to queryDatasets selects datasets
1461 from collections in the order given".
1462 """
1463 registry = self.makeRegistry()
1464 self.loadData(registry, "base.yaml")
1465 self.loadData(registry, "datasets.yaml")
1466 self.assertCountEqual(
1467 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1468 [
1469 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1470 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1471 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1472 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1473 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1474 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1475 ],
1476 )
1477 self.assertCountEqual(
1478 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1479 [
1480 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1481 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1482 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1483 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1484 ],
1485 )
1486 self.assertCountEqual(
1487 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1488 [
1489 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1490 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1491 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1492 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1493 ],
1494 )
1496 def testQueryResults(self):
1497 """Test querying for data IDs and then manipulating the QueryResults
1498 object returned to perform other queries.
1499 """
1500 registry = self.makeRegistry()
1501 self.loadData(registry, "base.yaml")
1502 self.loadData(registry, "datasets.yaml")
1503 bias = registry.getDatasetType("bias")
1504 flat = registry.getDatasetType("flat")
1505 # Obtain expected results from methods other than those we're testing
1506 # here. That includes:
1507 # - the dimensions of the data IDs we want to query:
1508 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1509 # - the dimensions of some other data IDs we'll extract from that:
1510 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1511 # - the data IDs we expect to obtain from the first queries:
1512 expectedDataIds = DataCoordinateSet(
1513 {
1514 DataCoordinate.standardize(
1515 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1516 )
1517 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1518 },
1519 dimensions=expected_dimensions,
1520 hasFull=False,
1521 hasRecords=False,
1522 )
1523 # - the flat datasets we expect to find from those data IDs, in just
1524 # one collection (so deduplication is irrelevant):
1525 expectedFlats = [
1526 registry.findDataset(
1527 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1528 ),
1529 registry.findDataset(
1530 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1531 ),
1532 registry.findDataset(
1533 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1534 ),
1535 ]
1536 # - the data IDs we expect to extract from that:
1537 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1538 # - the bias datasets we expect to find from those data IDs, after we
1539 # subset-out the physical_filter dimension, both with duplicates:
1540 expectedAllBiases = [
1541 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1542 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1543 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1544 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1545 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1546 ]
1547 # - ...and without duplicates:
1548 expectedDeduplicatedBiases = [
1549 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1550 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1551 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1552 ]
1553 # Test against those expected results, using a "lazy" query for the
1554 # data IDs (which re-executes that query each time we use it to do
1555 # something new).
1556 dataIds = registry.queryDataIds(
1557 ["detector", "physical_filter"],
1558 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1559 instrument="Cam1",
1560 )
1561 self.assertEqual(dataIds.dimensions, expected_dimensions)
1562 self.assertEqual(dataIds.toSet(), expectedDataIds)
1563 self.assertCountEqual(
1564 list(
1565 dataIds.findDatasets(
1566 flat,
1567 collections=["imported_r"],
1568 )
1569 ),
1570 expectedFlats,
1571 )
1572 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1573 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1574 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1575 self.assertCountEqual(
1576 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1577 expectedAllBiases,
1578 )
1579 self.assertCountEqual(
1580 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1581 expectedDeduplicatedBiases,
1582 )
1584 # Searching for a dataset with dimensions we had projected away
1585 # restores those dimensions.
1586 self.assertCountEqual(
1587 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1588 expectedFlats,
1589 )
1591 # Use a named dataset type that does not exist and a dataset type
1592 # object that does not exist.
1593 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1595 # Test both string name and dataset type object.
1596 test_type: str | DatasetType
1597 for test_type, test_type_name in (
1598 (unknown_type, unknown_type.name),
1599 (unknown_type.name, unknown_type.name),
1600 ):
1601 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1602 list(
1603 subsetDataIds.findDatasets(
1604 test_type, collections=["imported_r", "imported_g"], findFirst=True
1605 )
1606 )
1608 # Materialize the bias dataset queries (only) by putting the results
1609 # into temporary tables, then repeat those tests.
1610 with subsetDataIds.findDatasets(
1611 bias, collections=["imported_r", "imported_g"], findFirst=False
1612 ).materialize() as biases:
1613 self.assertCountEqual(list(biases), expectedAllBiases)
1614 with subsetDataIds.findDatasets(
1615 bias, collections=["imported_r", "imported_g"], findFirst=True
1616 ).materialize() as biases:
1617 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1618 # Materialize the data ID subset query, but not the dataset queries.
1619 with subsetDataIds.materialize() as subsetDataIds:
1620 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1621 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1622 self.assertCountEqual(
1623 list(
1624 subsetDataIds.findDatasets(
1625 bias, collections=["imported_r", "imported_g"], findFirst=False
1626 )
1627 ),
1628 expectedAllBiases,
1629 )
1630 self.assertCountEqual(
1631 list(
1632 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1633 ),
1634 expectedDeduplicatedBiases,
1635 )
1636 # Materialize the dataset queries, too.
1637 with subsetDataIds.findDatasets(
1638 bias, collections=["imported_r", "imported_g"], findFirst=False
1639 ).materialize() as biases:
1640 self.assertCountEqual(list(biases), expectedAllBiases)
1641 with subsetDataIds.findDatasets(
1642 bias, collections=["imported_r", "imported_g"], findFirst=True
1643 ).materialize() as biases:
1644 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1645 # Materialize the original query, but none of the follow-up queries.
1646 with dataIds.materialize() as dataIds:
1647 self.assertEqual(dataIds.dimensions, expected_dimensions)
1648 self.assertEqual(dataIds.toSet(), expectedDataIds)
1649 self.assertCountEqual(
1650 list(
1651 dataIds.findDatasets(
1652 flat,
1653 collections=["imported_r"],
1654 )
1655 ),
1656 expectedFlats,
1657 )
1658 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1659 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1660 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1661 self.assertCountEqual(
1662 list(
1663 subsetDataIds.findDatasets(
1664 bias, collections=["imported_r", "imported_g"], findFirst=False
1665 )
1666 ),
1667 expectedAllBiases,
1668 )
1669 self.assertCountEqual(
1670 list(
1671 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1672 ),
1673 expectedDeduplicatedBiases,
1674 )
1675 # Materialize just the bias dataset queries.
1676 with subsetDataIds.findDatasets(
1677 bias, collections=["imported_r", "imported_g"], findFirst=False
1678 ).materialize() as biases:
1679 self.assertCountEqual(list(biases), expectedAllBiases)
1680 with subsetDataIds.findDatasets(
1681 bias, collections=["imported_r", "imported_g"], findFirst=True
1682 ).materialize() as biases:
1683 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1684 # Materialize the subset data ID query, but not the dataset
1685 # queries.
1686 with subsetDataIds.materialize() as subsetDataIds:
1687 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1688 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1689 self.assertCountEqual(
1690 list(
1691 subsetDataIds.findDatasets(
1692 bias, collections=["imported_r", "imported_g"], findFirst=False
1693 )
1694 ),
1695 expectedAllBiases,
1696 )
1697 self.assertCountEqual(
1698 list(
1699 subsetDataIds.findDatasets(
1700 bias, collections=["imported_r", "imported_g"], findFirst=True
1701 )
1702 ),
1703 expectedDeduplicatedBiases,
1704 )
1705 # Materialize the bias dataset queries, too, so now we're
1706 # materializing every single step.
1707 with subsetDataIds.findDatasets(
1708 bias, collections=["imported_r", "imported_g"], findFirst=False
1709 ).materialize() as biases:
1710 self.assertCountEqual(list(biases), expectedAllBiases)
1711 with subsetDataIds.findDatasets(
1712 bias, collections=["imported_r", "imported_g"], findFirst=True
1713 ).materialize() as biases:
1714 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1716 def testStorageClassPropagation(self):
1717 """Test that queries for datasets respect the storage class passed in
1718 as part of a full dataset type.
1719 """
1720 registry = self.makeRegistry()
1721 self.loadData(registry, "base.yaml")
1722 dataset_type_in_registry = DatasetType(
1723 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1724 )
1725 registry.registerDatasetType(dataset_type_in_registry)
1726 run = "run1"
1727 registry.registerRun(run)
1728 (inserted_ref,) = registry.insertDatasets(
1729 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1730 )
1731 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1732 query_dataset_type = DatasetType(
1733 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1734 )
1735 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1736 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1737 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1738 (query_datasets_ref,) = query_datasets_result
1739 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1740 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1741 query_dataset_type, collections=[run]
1742 )
1743 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1744 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1745 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1746 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1747 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1748 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1749 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1751 def testEmptyDimensionsQueries(self):
1752 """Test Query and QueryResults objects in the case where there are no
1753 dimensions.
1754 """
1755 # Set up test data: one dataset type, two runs, one dataset in each.
1756 registry = self.makeRegistry()
1757 self.loadData(registry, "base.yaml")
1758 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1759 registry.registerDatasetType(schema)
1760 dataId = DataCoordinate.make_empty(registry.dimensions)
1761 run1 = "run1"
1762 run2 = "run2"
1763 registry.registerRun(run1)
1764 registry.registerRun(run2)
1765 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1766 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1767 # Query directly for both of the datasets, and each one, one at a time.
1768 self.checkQueryResults(
1769 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1770 )
1771 self.checkQueryResults(
1772 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1773 [dataset1],
1774 )
1775 self.checkQueryResults(
1776 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1777 [dataset2],
1778 )
1779 # Query for data IDs with no dimensions.
1780 dataIds = registry.queryDataIds([])
1781 self.checkQueryResults(dataIds, [dataId])
1782 # Use queried data IDs to find the datasets.
1783 self.checkQueryResults(
1784 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1785 [dataset1, dataset2],
1786 )
1787 self.checkQueryResults(
1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1789 [dataset1],
1790 )
1791 self.checkQueryResults(
1792 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1793 [dataset2],
1794 )
1795 # Now materialize the data ID query results and repeat those tests.
1796 with dataIds.materialize() as dataIds:
1797 self.checkQueryResults(dataIds, [dataId])
1798 self.checkQueryResults(
1799 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1800 [dataset1],
1801 )
1802 self.checkQueryResults(
1803 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1804 [dataset2],
1805 )
1806 # Query for non-empty data IDs, then subset that to get the empty one.
1807 # Repeat the above tests starting from that.
1808 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1809 self.checkQueryResults(dataIds, [dataId])
1810 self.checkQueryResults(
1811 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1812 [dataset1, dataset2],
1813 )
1814 self.checkQueryResults(
1815 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1816 [dataset1],
1817 )
1818 self.checkQueryResults(
1819 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1820 [dataset2],
1821 )
1822 with dataIds.materialize() as dataIds:
1823 self.checkQueryResults(dataIds, [dataId])
1824 self.checkQueryResults(
1825 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1826 [dataset1, dataset2],
1827 )
1828 self.checkQueryResults(
1829 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1830 [dataset1],
1831 )
1832 self.checkQueryResults(
1833 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1834 [dataset2],
1835 )
1836 # Query for non-empty data IDs, then materialize, then subset to get
1837 # the empty one. Repeat again.
1838 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1839 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1840 self.checkQueryResults(dataIds, [dataId])
1841 self.checkQueryResults(
1842 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1843 [dataset1, dataset2],
1844 )
1845 self.checkQueryResults(
1846 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1847 [dataset1],
1848 )
1849 self.checkQueryResults(
1850 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1851 [dataset2],
1852 )
1853 with dataIds.materialize() as dataIds:
1854 self.checkQueryResults(dataIds, [dataId])
1855 self.checkQueryResults(
1856 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1857 [dataset1, dataset2],
1858 )
1859 self.checkQueryResults(
1860 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1861 [dataset1],
1862 )
1863 self.checkQueryResults(
1864 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1865 [dataset2],
1866 )
1867 # Repeat the materialization tests with a dimension element that isn't
1868 # cached, so there's no way we can know when building the query where
1869 # there are any rows are not (there aren't).
1870 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1871 with dataIds.materialize() as dataIds:
1872 self.checkQueryResults(dataIds, [])
1873 self.checkQueryResults(
1874 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1875 )
1876 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1877 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1878 # Query for non-empty data IDs with a constraint on an empty-data-ID
1879 # dataset that exists.
1880 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1881 self.checkQueryResults(
1882 dataIds.subset(unique=True),
1883 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1884 )
1885 # Again query for non-empty data IDs with a constraint on empty-data-ID
1886 # datasets, but when the datasets don't exist. We delete the existing
1887 # dataset and query just that collection rather than creating a new
1888 # empty collection because this is a bit less likely for our build-time
1889 # logic to shortcut-out (via the collection summaries), and such a
1890 # shortcut would make this test a bit more trivial than we'd like.
1891 registry.removeDatasets([dataset2])
1892 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1893 self.checkQueryResults(dataIds, [])
1895 def testDimensionDataModifications(self):
1896 """Test that modifying dimension records via:
1897 syncDimensionData(..., update=True) and
1898 insertDimensionData(..., replace=True) works as expected, even in the
1899 presence of datasets using those dimensions and spatial overlap
1900 relationships.
1901 """
1903 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1904 """Unpack a sphgeom.RangeSet into the integers it contains."""
1905 for begin, end in ranges:
1906 yield from range(begin, end)
1908 def _range_set_hull(
1909 ranges: lsst.sphgeom.RangeSet,
1910 pixelization: lsst.sphgeom.HtmPixelization,
1911 ) -> lsst.sphgeom.ConvexPolygon:
1912 """Create a ConvexPolygon hull of the region defined by a set of
1913 HTM pixelization index ranges.
1914 """
1915 points = []
1916 for index in _unpack_range_set(ranges):
1917 points.extend(pixelization.triangle(index).getVertices())
1918 return lsst.sphgeom.ConvexPolygon(points)
1920 # Use HTM to set up an initial parent region (one arbitrary trixel)
1921 # and four child regions (the trixels within the parent at the next
1922 # level. We'll use the parent as a tract/visit region and the children
1923 # as its patch/visit_detector regions.
1924 registry = self.makeRegistry()
1925 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1926 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1927 index = 12288
1928 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1929 assert htm6.universe().contains(child_ranges_small)
1930 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1931 parent_region_small = lsst.sphgeom.ConvexPolygon(
1932 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1933 )
1934 assert all(parent_region_small.contains(c) for c in child_regions_small)
1935 # Make a larger version of each child region, defined to be the set of
1936 # htm6 trixels that overlap the original's bounding circle. Make a new
1937 # parent that's the convex hull of the new children.
1938 child_regions_large = [
1939 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1940 ]
1941 assert all(
1942 large.contains(small)
1943 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1944 )
1945 parent_region_large = lsst.sphgeom.ConvexPolygon(
1946 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1947 )
1948 assert all(parent_region_large.contains(c) for c in child_regions_large)
1949 assert parent_region_large.contains(parent_region_small)
1950 assert not parent_region_small.contains(parent_region_large)
1951 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1952 # Find some commonSkyPix indices that overlap the large regions but not
1953 # overlap the small regions. We use commonSkyPix here to make sure the
1954 # real tests later involve what's in the database, not just post-query
1955 # filtering of regions.
1956 child_difference_indices = []
1957 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1958 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1959 assert difference, "if this is empty, we can't test anything useful with these regions"
1960 assert all(
1961 not commonSkyPix.triangle(d).isDisjointFrom(large)
1962 and commonSkyPix.triangle(d).isDisjointFrom(small)
1963 for d in difference
1964 )
1965 child_difference_indices.append(difference)
1966 parent_difference_indices = list(
1967 _unpack_range_set(
1968 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1969 )
1970 )
1971 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1972 assert all(
1973 (
1974 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1975 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1976 )
1977 for d in parent_difference_indices
1978 )
1979 # Now that we've finally got those regions, we'll insert the large ones
1980 # as tract/patch dimension records.
1981 skymap_name = "testing_v1"
1982 registry.insertDimensionData(
1983 "skymap",
1984 {
1985 "name": skymap_name,
1986 "hash": bytes([42]),
1987 "tract_max": 1,
1988 "patch_nx_max": 2,
1989 "patch_ny_max": 2,
1990 },
1991 )
1992 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1993 registry.insertDimensionData(
1994 "patch",
1995 *[
1996 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1997 for n, c in enumerate(child_regions_large)
1998 ],
1999 )
2000 # Add at dataset that uses these dimensions to make sure that modifying
2001 # them doesn't disrupt foreign keys (need to make sure DB doesn't
2002 # implement insert with replace=True as delete-then-insert).
2003 dataset_type = DatasetType(
2004 "coadd",
2005 dimensions=["tract", "patch"],
2006 universe=registry.dimensions,
2007 storageClass="Exposure",
2008 )
2009 registry.registerDatasetType(dataset_type)
2010 registry.registerCollection("the_run", CollectionType.RUN)
2011 registry.insertDatasets(
2012 dataset_type,
2013 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2014 run="the_run",
2015 )
2016 # Query for tracts and patches that overlap some "difference" htm9
2017 # pixels; there should be overlaps, because the database has
2018 # the "large" suite of regions.
2019 self.assertEqual(
2020 {0},
2021 {
2022 data_id["tract"]
2023 for data_id in registry.queryDataIds(
2024 ["tract"],
2025 skymap=skymap_name,
2026 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2027 )
2028 },
2029 )
2030 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2031 self.assertIn(
2032 patch_id,
2033 {
2034 data_id["patch"]
2035 for data_id in registry.queryDataIds(
2036 ["patch"],
2037 skymap=skymap_name,
2038 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2039 )
2040 },
2041 )
2042 # Use sync to update the tract region and insert to update the regions
2043 # of the patches, to the "small" suite.
2044 updated = registry.syncDimensionData(
2045 "tract",
2046 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2047 update=True,
2048 )
2049 self.assertEqual(updated, {"region": parent_region_large})
2050 registry.insertDimensionData(
2051 "patch",
2052 *[
2053 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2054 for n, c in enumerate(child_regions_small)
2055 ],
2056 replace=True,
2057 )
2058 # Query again; there now should be no such overlaps, because the
2059 # database has the "small" suite of regions.
2060 self.assertFalse(
2061 set(
2062 registry.queryDataIds(
2063 ["tract"],
2064 skymap=skymap_name,
2065 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2066 )
2067 )
2068 )
2069 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2070 self.assertNotIn(
2071 patch_id,
2072 {
2073 data_id["patch"]
2074 for data_id in registry.queryDataIds(
2075 ["patch"],
2076 skymap=skymap_name,
2077 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2078 )
2079 },
2080 )
2081 # Update back to the large regions and query one more time.
2082 updated = registry.syncDimensionData(
2083 "tract",
2084 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2085 update=True,
2086 )
2087 self.assertEqual(updated, {"region": parent_region_small})
2088 registry.insertDimensionData(
2089 "patch",
2090 *[
2091 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2092 for n, c in enumerate(child_regions_large)
2093 ],
2094 replace=True,
2095 )
2096 self.assertEqual(
2097 {0},
2098 {
2099 data_id["tract"]
2100 for data_id in registry.queryDataIds(
2101 ["tract"],
2102 skymap=skymap_name,
2103 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2104 )
2105 },
2106 )
2107 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2108 self.assertIn(
2109 patch_id,
2110 {
2111 data_id["patch"]
2112 for data_id in registry.queryDataIds(
2113 ["patch"],
2114 skymap=skymap_name,
2115 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2116 )
2117 },
2118 )
2120 def testCalibrationCollections(self):
2121 """Test operations on `~CollectionType.CALIBRATION` collections,
2122 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2123 `SqlRegistry.findDataset`, and
2124 `DataCoordinateQueryResults.findRelatedDatasets`.
2125 """
2126 # Setup - make a Registry, fill it with some datasets in
2127 # non-calibration collections.
2128 registry = self.makeRegistry()
2129 self.loadData(registry, "base.yaml")
2130 self.loadData(registry, "datasets.yaml")
2131 # Set up some timestamps.
2132 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2133 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2134 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2135 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2136 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2137 allTimespans = [
2138 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2139 ]
2140 # Insert some exposure records with timespans between each sequential
2141 # pair of those.
2142 registry.insertDimensionData(
2143 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2144 )
2145 registry.insertDimensionData(
2146 "group",
2147 {"instrument": "Cam1", "name": "group0"},
2148 {"instrument": "Cam1", "name": "group1"},
2149 {"instrument": "Cam1", "name": "group2"},
2150 {"instrument": "Cam1", "name": "group3"},
2151 )
2152 registry.insertDimensionData(
2153 "exposure",
2154 {
2155 "instrument": "Cam1",
2156 "id": 0,
2157 "group": "group0",
2158 "obs_id": "zero",
2159 "physical_filter": "Cam1-G",
2160 "day_obs": 20200101,
2161 "timespan": Timespan(t1, t2),
2162 },
2163 {
2164 "instrument": "Cam1",
2165 "id": 1,
2166 "group": "group1",
2167 "obs_id": "one",
2168 "physical_filter": "Cam1-G",
2169 "day_obs": 20200101,
2170 "timespan": Timespan(t2, t3),
2171 },
2172 {
2173 "instrument": "Cam1",
2174 "id": 2,
2175 "group": "group2",
2176 "obs_id": "two",
2177 "physical_filter": "Cam1-G",
2178 "day_obs": 20200101,
2179 "timespan": Timespan(t3, t4),
2180 },
2181 {
2182 "instrument": "Cam1",
2183 "id": 3,
2184 "group": "group3",
2185 "obs_id": "three",
2186 "physical_filter": "Cam1-G",
2187 "day_obs": 20200101,
2188 "timespan": Timespan(t4, t5),
2189 },
2190 )
2191 # Get references to some datasets.
2192 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2193 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2194 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2195 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2196 # Register the main calibration collection we'll be working with.
2197 collection = "Cam1/calibs/default"
2198 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2199 # Cannot associate into a calibration collection (no timespan).
2200 with self.assertRaises(CollectionTypeError):
2201 registry.associate(collection, [bias2a])
2202 # Certify 2a dataset with [t2, t4) validity.
2203 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2204 # Test that we can query for this dataset via the new collection, both
2205 # on its own and with a RUN collection.
2206 self.assertEqual(
2207 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2208 {bias2a},
2209 )
2210 self.assertEqual(
2211 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2212 {
2213 bias2a,
2214 bias2b,
2215 bias3b,
2216 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2217 },
2218 )
2219 self.assertEqual(
2220 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2221 {registry.expandDataId(instrument="Cam1", detector=2)},
2222 )
2223 self.assertEqual(
2224 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2225 {
2226 registry.expandDataId(instrument="Cam1", detector=2),
2227 registry.expandDataId(instrument="Cam1", detector=3),
2228 registry.expandDataId(instrument="Cam1", detector=4),
2229 },
2230 )
2231 self.assertEqual(
2232 set(
2233 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2234 "bias", findFirst=True, collections=[collection]
2235 )
2236 ),
2237 {
2238 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2239 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2240 },
2241 )
2242 self.assertEqual(
2243 set(
2244 registry.queryDataIds(
2245 ["exposure", "detector"], instrument="Cam1", detector=2
2246 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2247 ),
2248 {
2249 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2250 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2251 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2252 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2253 },
2254 )
2256 # We should not be able to certify 2b with anything overlapping that
2257 # window.
2258 with self.assertRaises(ConflictingDefinitionError):
2259 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2260 with self.assertRaises(ConflictingDefinitionError):
2261 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2262 with self.assertRaises(ConflictingDefinitionError):
2263 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2264 with self.assertRaises(ConflictingDefinitionError):
2265 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2266 with self.assertRaises(ConflictingDefinitionError):
2267 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2268 with self.assertRaises(ConflictingDefinitionError):
2269 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2270 with self.assertRaises(ConflictingDefinitionError):
2271 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2272 with self.assertRaises(ConflictingDefinitionError):
2273 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2274 # We should be able to certify 3a with a range overlapping that window,
2275 # because it's for a different detector.
2276 # We'll certify 3a over [t1, t3).
2277 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2278 # Now we'll certify 2b and 3b together over [t4, ∞).
2279 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2281 # Fetch all associations and check that they are what we expect.
2282 self.assertCountEqual(
2283 list(
2284 registry.queryDatasetAssociations(
2285 "bias",
2286 collections=[collection, "imported_g", "imported_r"],
2287 )
2288 ),
2289 [
2290 DatasetAssociation(
2291 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2292 collection="imported_g",
2293 timespan=None,
2294 ),
2295 DatasetAssociation(
2296 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2297 collection="imported_r",
2298 timespan=None,
2299 ),
2300 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2301 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2302 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2303 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2304 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2305 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2306 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2307 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2308 ],
2309 )
2311 class Ambiguous:
2312 """Tag class to denote lookups that should be ambiguous."""
2314 pass
2316 def _assertLookup(
2317 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2318 ) -> None:
2319 """Local function that asserts that a bias lookup returns the given
2320 expected result.
2321 """
2322 if expected is Ambiguous:
2323 with self.assertRaises((DatasetTypeError, LookupError)):
2324 registry.findDataset(
2325 "bias",
2326 collections=collection,
2327 instrument="Cam1",
2328 detector=detector,
2329 timespan=timespan,
2330 )
2331 else:
2332 self.assertEqual(
2333 expected,
2334 registry.findDataset(
2335 "bias",
2336 collections=collection,
2337 instrument="Cam1",
2338 detector=detector,
2339 timespan=timespan,
2340 ),
2341 )
2343 # Systematically test lookups against expected results.
2344 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2345 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2346 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2347 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2348 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2349 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2350 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2351 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2352 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2353 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2354 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2355 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2356 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2357 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2358 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2359 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2360 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2361 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2362 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2363 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2364 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2365 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2366 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2367 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2368 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2369 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2370 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2371 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2372 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2373 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2374 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2375 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2376 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2377 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2378 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2379 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2380 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2381 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2382 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2383 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2384 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2385 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2387 # Test lookups via temporal joins to exposures.
2388 self.assertEqual(
2389 set(
2390 registry.queryDataIds(
2391 ["exposure", "detector"], instrument="Cam1", detector=2
2392 ).findRelatedDatasets("bias", collections=[collection])
2393 ),
2394 {
2395 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2396 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2397 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2398 },
2399 )
2400 self.assertEqual(
2401 set(
2402 registry.queryDataIds(
2403 ["exposure", "detector"], instrument="Cam1", detector=3
2404 ).findRelatedDatasets("bias", collections=[collection])
2405 ),
2406 {
2407 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2408 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2409 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2410 },
2411 )
2412 self.assertEqual(
2413 set(
2414 registry.queryDataIds(
2415 ["exposure", "detector"], instrument="Cam1", detector=2
2416 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2417 ),
2418 {
2419 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2420 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2421 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2422 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2423 },
2424 )
2425 self.assertEqual(
2426 set(
2427 registry.queryDataIds(
2428 ["exposure", "detector"], instrument="Cam1", detector=3
2429 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2430 ),
2431 {
2432 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2433 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2434 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2435 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2436 },
2437 )
2439 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2440 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2441 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2442 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2443 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2444 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2445 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2446 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2447 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2448 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2449 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2450 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2451 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2452 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2453 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2454 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2455 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2456 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2457 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2458 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2459 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2460 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2461 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2462 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2463 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2464 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2465 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2466 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2467 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2468 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2469 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2470 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2471 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2472 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2473 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2474 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2475 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2476 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2477 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2478 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2479 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2480 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2481 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2482 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2483 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2484 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2486 # Decertify everything, this time with explicit data IDs, then check
2487 # that no lookups succeed.
2488 registry.decertify(
2489 collection,
2490 "bias",
2491 Timespan(None, None),
2492 dataIds=[
2493 dict(instrument="Cam1", detector=2),
2494 dict(instrument="Cam1", detector=3),
2495 ],
2496 )
2497 for detector in (2, 3):
2498 for timespan in allTimespans:
2499 _assertLookup(detector=detector, timespan=timespan, expected=None)
2500 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2501 # those.
2502 registry.certify(
2503 collection,
2504 [bias2a, bias3a],
2505 Timespan(None, None),
2506 )
2507 for timespan in allTimespans:
2508 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2509 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2510 # Decertify just bias2 over [t2, t4).
2511 # This should split a single certification row into two (and leave the
2512 # other existing row, for bias3a, alone).
2513 registry.decertify(
2514 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2515 )
2516 for timespan in allTimespans:
2517 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2518 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2519 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2520 if overlapsBefore and overlapsAfter:
2521 expected = Ambiguous
2522 elif overlapsBefore or overlapsAfter:
2523 expected = bias2a
2524 else:
2525 expected = None
2526 _assertLookup(detector=2, timespan=timespan, expected=expected)
2528 def testSkipCalibs(self):
2529 """Test how queries handle skipping of calibration collections."""
2530 registry = self.makeRegistry()
2531 self.loadData(registry, "base.yaml")
2532 self.loadData(registry, "datasets.yaml")
2534 coll_calib = "Cam1/calibs/default"
2535 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2537 # Add all biases to the calibration collection.
2538 # Without this, the logic that prunes dataset subqueries based on
2539 # datasetType-collection summary information will fire before the logic
2540 # we want to test below. This is a good thing (it avoids the dreaded
2541 # NotImplementedError a bit more often) everywhere but here.
2542 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2544 coll_list = [coll_calib, "imported_g", "imported_r"]
2545 chain = "Cam1/chain"
2546 registry.registerCollection(chain, type=CollectionType.CHAINED)
2547 registry.setCollectionChain(chain, coll_list)
2549 # explicit list will raise if findFirst=True or there are temporal
2550 # dimensions
2551 with self.assertRaises(NotImplementedError):
2552 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2553 with self.assertRaises(NotImplementedError):
2554 registry.queryDataIds(
2555 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2556 ).count()
2558 # chain will skip
2559 datasets = list(registry.queryDatasets("bias", collections=chain))
2560 self.assertGreater(len(datasets), 0)
2562 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2563 self.assertGreater(len(dataIds), 0)
2565 # glob will skip too
2566 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2567 self.assertGreater(len(datasets), 0)
2569 # regular expression will skip too
2570 pattern = re.compile(".*")
2571 datasets = list(registry.queryDatasets("bias", collections=pattern))
2572 self.assertGreater(len(datasets), 0)
2574 # ellipsis should work as usual
2575 datasets = list(registry.queryDatasets("bias", collections=...))
2576 self.assertGreater(len(datasets), 0)
2578 # few tests with findFirst
2579 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2580 self.assertGreater(len(datasets), 0)
2582 def testIngestTimeQuery(self):
2583 registry = self.makeRegistry()
2584 self.loadData(registry, "base.yaml")
2585 dt0 = datetime.datetime.now(datetime.UTC)
2586 self.loadData(registry, "datasets.yaml")
2587 dt1 = datetime.datetime.now(datetime.UTC)
2589 datasets = list(registry.queryDatasets(..., collections=...))
2590 len0 = len(datasets)
2591 self.assertGreater(len0, 0)
2593 where = "ingest_date > T'2000-01-01'"
2594 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2595 len1 = len(datasets)
2596 self.assertEqual(len0, len1)
2598 # no one will ever use this piece of software in 30 years
2599 where = "ingest_date > T'2050-01-01'"
2600 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2601 len2 = len(datasets)
2602 self.assertEqual(len2, 0)
2604 # Check more exact timing to make sure there is no 37 seconds offset
2605 # (after fixing DM-30124). SQLite time precision is 1 second, make
2606 # sure that we don't test with higher precision.
2607 tests = [
2608 # format: (timestamp, operator, expected_len)
2609 (dt0 - timedelta(seconds=1), ">", len0),
2610 (dt0 - timedelta(seconds=1), "<", 0),
2611 (dt1 + timedelta(seconds=1), "<", len0),
2612 (dt1 + timedelta(seconds=1), ">", 0),
2613 ]
2614 for dt, op, expect_len in tests:
2615 dt_str = dt.isoformat(sep=" ")
2617 where = f"ingest_date {op} T'{dt_str}'"
2618 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2619 self.assertEqual(len(datasets), expect_len)
2621 # same with bind using datetime or astropy Time
2622 where = f"ingest_date {op} ingest_time"
2623 datasets = list(
2624 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2625 )
2626 self.assertEqual(len(datasets), expect_len)
2628 dt_astropy = astropy.time.Time(dt, format="datetime")
2629 datasets = list(
2630 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2631 )
2632 self.assertEqual(len(datasets), expect_len)
2634 def testTimespanQueries(self):
2635 """Test query expressions involving timespans."""
2636 registry = self.makeRegistry()
2637 self.loadData(registry, "hsc-rc2-subset.yaml")
2638 # All exposures in the database; mapping from ID to timespan.
2639 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2640 # Just those IDs, sorted (which is also temporal sorting, because HSC
2641 # exposure IDs are monotonically increasing).
2642 ids = sorted(visits.keys())
2643 self.assertGreater(len(ids), 20)
2644 # Pick some quasi-random indexes into `ids` to play with.
2645 i1 = int(len(ids) * 0.1)
2646 i2 = int(len(ids) * 0.3)
2647 i3 = int(len(ids) * 0.6)
2648 i4 = int(len(ids) * 0.8)
2649 # Extract some times from those: just before the beginning of i1 (which
2650 # should be after the end of the exposure before), exactly the
2651 # beginning of i2, just after the beginning of i3 (and before its end),
2652 # and the exact end of i4.
2653 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2654 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2655 t2 = visits[ids[i2]].begin
2656 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2657 self.assertLess(t3, visits[ids[i3]].end)
2658 t4 = visits[ids[i4]].end
2659 # Make sure those are actually in order.
2660 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2662 bind = {
2663 "t1": t1,
2664 "t2": t2,
2665 "t3": t3,
2666 "t4": t4,
2667 "ts23": Timespan(t2, t3),
2668 }
2670 def query(where):
2671 """Return results as a sorted, deduplicated list of visit IDs.
2673 Parameters
2674 ----------
2675 where : `str`
2676 The WHERE clause for the query.
2677 """
2678 return sorted(
2679 {
2680 dataId["visit"]
2681 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2682 }
2683 )
2685 # Try a bunch of timespan queries, mixing up the bounds themselves,
2686 # where they appear in the expression, and how we get the timespan into
2687 # the expression.
2689 # t1 is before the start of i1, so this should not include i1.
2690 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2691 # t2 is exactly at the start of i2, but ends are exclusive, so these
2692 # should not include i2.
2693 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2694 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2695 # t3 is in the middle of i3, so this should include i3.
2696 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2697 # This one should not include t3 by the same reasoning.
2698 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2699 # t4 is exactly at the end of i4, so this should include i4.
2700 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2701 # i4's upper bound of t4 is exclusive so this should not include t4.
2702 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2704 # Now some timespan vs. time scalar queries.
2705 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2706 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2707 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2708 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2709 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2710 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2712 # Empty timespans should not overlap anything.
2713 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2715 def testCollectionSummaries(self):
2716 """Test recording and retrieval of collection summaries."""
2717 self.maxDiff = None
2718 registry = self.makeRegistry()
2719 # Importing datasets from yaml should go through the code path where
2720 # we update collection summaries as we insert datasets.
2721 self.loadData(registry, "base.yaml")
2722 self.loadData(registry, "datasets.yaml")
2723 flat = registry.getDatasetType("flat")
2724 expected1 = CollectionSummary()
2725 expected1.dataset_types.add(registry.getDatasetType("bias"))
2726 expected1.add_data_ids(
2727 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2728 )
2729 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2730 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2731 # Create a chained collection with both of the imported runs; the
2732 # summary should be the same, because it's a union with itself.
2733 chain = "chain"
2734 registry.registerCollection(chain, CollectionType.CHAINED)
2735 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2736 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2737 # Associate flats only into a tagged collection and a calibration
2738 # collection to check summaries of those.
2739 tag = "tag"
2740 registry.registerCollection(tag, CollectionType.TAGGED)
2741 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2742 calibs = "calibs"
2743 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2744 registry.certify(
2745 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2746 )
2747 expected2 = expected1.copy()
2748 expected2.dataset_types.discard("bias")
2749 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2750 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2751 # Explicitly calling SqlRegistry.refresh() should load those same
2752 # summaries, via a totally different code path.
2753 registry.refresh()
2754 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2755 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2756 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2757 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2759 def testBindInQueryDatasets(self):
2760 """Test that the bind parameter is correctly forwarded in
2761 queryDatasets recursion.
2762 """
2763 registry = self.makeRegistry()
2764 # Importing datasets from yaml should go through the code path where
2765 # we update collection summaries as we insert datasets.
2766 self.loadData(registry, "base.yaml")
2767 self.loadData(registry, "datasets.yaml")
2768 self.assertEqual(
2769 set(registry.queryDatasets("flat", band="r", collections=...)),
2770 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2771 )
2773 def testQueryIntRangeExpressions(self):
2774 """Test integer range expressions in ``where`` arguments.
2776 Note that our expressions use inclusive stop values, unlike Python's.
2777 """
2778 registry = self.makeRegistry()
2779 self.loadData(registry, "base.yaml")
2780 self.assertEqual(
2781 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2782 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2783 )
2784 self.assertEqual(
2785 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2786 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2787 )
2788 self.assertEqual(
2789 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2790 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2791 )
2793 def testQueryResultSummaries(self):
2794 """Test summary methods like `count`, `any`, and `explain_no_results`
2795 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2796 """
2797 registry = self.makeRegistry()
2798 self.loadData(registry, "base.yaml")
2799 self.loadData(registry, "datasets.yaml")
2800 self.loadData(registry, "spatial.yaml")
2801 # Default test dataset has two collections, each with both flats and
2802 # biases. Add a new collection with only biases.
2803 registry.registerCollection("biases", CollectionType.TAGGED)
2804 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2805 # First query yields two results, and involves no postprocessing.
2806 query1 = registry.queryDataIds(["physical_filter"], band="r")
2807 self.assertTrue(query1.any(execute=False, exact=False))
2808 self.assertTrue(query1.any(execute=True, exact=False))
2809 self.assertTrue(query1.any(execute=True, exact=True))
2810 self.assertEqual(query1.count(exact=False), 2)
2811 self.assertEqual(query1.count(exact=True), 2)
2812 self.assertFalse(list(query1.explain_no_results()))
2813 # Second query should yield no results, which we should see when
2814 # we attempt to expand the data ID.
2815 query2 = registry.queryDataIds(["physical_filter"], band="h")
2816 # There's no execute=False, exact=Fals test here because the behavior
2817 # not something we want to guarantee in this case (and exact=False
2818 # says either answer is legal).
2819 self.assertFalse(query2.any(execute=True, exact=False))
2820 self.assertFalse(query2.any(execute=True, exact=True))
2821 self.assertEqual(query2.count(exact=False), 0)
2822 self.assertEqual(query2.count(exact=True), 0)
2823 self.assertTrue(list(query2.explain_no_results()))
2824 # These queries yield no results due to various problems that can be
2825 # spotted prior to execution, yielding helpful diagnostics.
2826 base_query = registry.queryDataIds(["detector", "physical_filter"])
2827 queries_and_snippets = [
2828 (
2829 # Dataset type name doesn't match any existing dataset types.
2830 registry.queryDatasets("nonexistent", collections=...),
2831 ["nonexistent"],
2832 ),
2833 (
2834 # Dataset type object isn't registered.
2835 registry.queryDatasets(
2836 DatasetType(
2837 "nonexistent",
2838 dimensions=["instrument"],
2839 universe=registry.dimensions,
2840 storageClass="Image",
2841 ),
2842 collections=...,
2843 ),
2844 ["nonexistent"],
2845 ),
2846 (
2847 # No datasets of this type in this collection.
2848 registry.queryDatasets("flat", collections=["biases"]),
2849 ["flat", "biases"],
2850 ),
2851 (
2852 # No datasets of this type in this collection.
2853 base_query.findDatasets("flat", collections=["biases"]),
2854 ["flat", "biases"],
2855 ),
2856 (
2857 # No collections matching at all.
2858 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2859 ["potato"],
2860 ),
2861 ]
2862 with self.assertRaises(MissingDatasetTypeError):
2863 # Dataset type name doesn't match any existing dataset types.
2864 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2865 with self.assertRaises(MissingDatasetTypeError):
2866 # Dataset type name doesn't match any existing dataset types.
2867 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2868 for query, snippets in queries_and_snippets:
2869 self.assertFalse(query.any(execute=False, exact=False))
2870 self.assertFalse(query.any(execute=True, exact=False))
2871 self.assertFalse(query.any(execute=True, exact=True))
2872 self.assertEqual(query.count(exact=False), 0)
2873 self.assertEqual(query.count(exact=True), 0)
2874 messages = list(query.explain_no_results())
2875 self.assertTrue(messages)
2876 # Want all expected snippets to appear in at least one message.
2877 self.assertTrue(
2878 any(
2879 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2880 ),
2881 messages,
2882 )
2884 # Wildcards on dataset types are not permitted in queryDataIds.
2885 with self.assertRaises(DatasetTypeExpressionError):
2886 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2888 # These queries yield no results due to problems that can be identified
2889 # by cheap follow-up queries, yielding helpful diagnostics.
2890 for query, snippets in [
2891 (
2892 # No records for one of the involved dimensions.
2893 registry.queryDataIds(["subfilter"]),
2894 ["no rows", "subfilter"],
2895 ),
2896 (
2897 # No records for one of the involved dimensions.
2898 registry.queryDimensionRecords("subfilter"),
2899 ["no rows", "subfilter"],
2900 ),
2901 ]:
2902 self.assertFalse(query.any(execute=True, exact=False))
2903 self.assertFalse(query.any(execute=True, exact=True))
2904 self.assertEqual(query.count(exact=True), 0)
2905 messages = list(query.explain_no_results())
2906 self.assertTrue(messages)
2907 # Want all expected snippets to appear in at least one message.
2908 self.assertTrue(
2909 any(
2910 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2911 ),
2912 messages,
2913 )
2915 # This query yields four overlaps in the database, but one is filtered
2916 # out in postprocessing. The count queries aren't accurate because
2917 # they don't account for duplication that happens due to an internal
2918 # join against commonSkyPix.
2919 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2920 self.assertEqual(
2921 {
2922 DataCoordinate.standardize(
2923 instrument="Cam1",
2924 skymap="SkyMap1",
2925 visit=v,
2926 tract=t,
2927 universe=registry.dimensions,
2928 )
2929 for v, t in [(1, 0), (2, 0), (2, 1)]
2930 },
2931 set(query3),
2932 )
2933 self.assertTrue(query3.any(execute=False, exact=False))
2934 self.assertTrue(query3.any(execute=True, exact=False))
2935 self.assertTrue(query3.any(execute=True, exact=True))
2936 self.assertGreaterEqual(query3.count(exact=False), 4)
2937 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2938 self.assertFalse(list(query3.explain_no_results()))
2939 # This query yields overlaps in the database, but all are filtered
2940 # out in postprocessing. The count queries again aren't very useful.
2941 # We have to use `where=` here to avoid an optimization that
2942 # (currently) skips the spatial postprocess-filtering because it
2943 # recognizes that no spatial join is necessary. That's not ideal, but
2944 # fixing it is out of scope for this ticket.
2945 query4 = registry.queryDataIds(
2946 ["visit", "tract"],
2947 instrument="Cam1",
2948 skymap="SkyMap1",
2949 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2950 )
2951 self.assertFalse(set(query4))
2952 self.assertTrue(query4.any(execute=False, exact=False))
2953 self.assertTrue(query4.any(execute=True, exact=False))
2954 self.assertFalse(query4.any(execute=True, exact=True))
2955 self.assertGreaterEqual(query4.count(exact=False), 1)
2956 self.assertEqual(query4.count(exact=True, discard=True), 0)
2957 messages = query4.explain_no_results()
2958 self.assertTrue(messages)
2959 self.assertTrue(any("overlap" in message for message in messages))
2960 # This query should yield results from one dataset type but not the
2961 # other, which is not registered.
2962 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2963 self.assertTrue(set(query5))
2964 self.assertTrue(query5.any(execute=False, exact=False))
2965 self.assertTrue(query5.any(execute=True, exact=False))
2966 self.assertTrue(query5.any(execute=True, exact=True))
2967 self.assertGreaterEqual(query5.count(exact=False), 1)
2968 self.assertGreaterEqual(query5.count(exact=True), 1)
2969 self.assertFalse(list(query5.explain_no_results()))
2970 # This query applies a selection that yields no results, fully in the
2971 # database. Explaining why it fails involves traversing the relation
2972 # tree and running a LIMIT 1 query at each level that has the potential
2973 # to remove rows.
2974 query6 = registry.queryDimensionRecords(
2975 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2976 )
2977 self.assertEqual(query6.count(exact=True), 0)
2978 messages = query6.explain_no_results()
2979 self.assertTrue(messages)
2980 self.assertTrue(any("no-purpose" in message for message in messages))
2982 def testQueryDataIdsExpressionError(self):
2983 """Test error checking of 'where' expressions in queryDataIds."""
2984 registry = self.makeRegistry()
2985 self.loadData(registry, "base.yaml")
2986 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2987 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2988 registry.queryDataIds(["detector"], where="foo.bar = 12")
2989 with self.assertRaisesRegex(
2990 LookupError, "Dimension element name cannot be inferred in this context."
2991 ):
2992 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2994 def testQueryDataIdsOrderBy(self):
2995 """Test order_by and limit on result returned by queryDataIds()."""
2996 registry = self.makeRegistry()
2997 self.loadData(registry, "base.yaml")
2998 self.loadData(registry, "datasets.yaml")
2999 self.loadData(registry, "spatial.yaml")
3001 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
3002 return registry.queryDataIds(
3003 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
3004 )
3006 Test = namedtuple(
3007 "testQueryDataIdsOrderByTest",
3008 ("order_by", "keys", "result", "limit", "datasets", "collections"),
3009 defaults=(None, None, None),
3010 )
3012 test_data = (
3013 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3014 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
3015 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
3016 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
3017 Test(
3018 "tract.id,visit.id",
3019 "tract,visit",
3020 ((0, 1), (0, 1), (0, 2)),
3021 limit=(3,),
3022 ),
3023 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
3024 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
3025 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
3026 Test(
3027 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
3028 ),
3029 Test(
3030 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
3031 ),
3032 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3033 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3034 Test(
3035 "tract,-visit.timespan.begin,visit.timespan.end",
3036 "tract,visit",
3037 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
3038 ),
3039 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3040 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3041 Test(
3042 "tract,detector",
3043 "tract,detector",
3044 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3045 datasets="flat",
3046 collections="imported_r",
3047 ),
3048 Test(
3049 "tract,detector.full_name",
3050 "tract,detector",
3051 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3052 datasets="flat",
3053 collections="imported_r",
3054 ),
3055 Test(
3056 "tract,detector.raft,detector.name_in_raft",
3057 "tract,detector",
3058 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3059 datasets="flat",
3060 collections="imported_r",
3061 ),
3062 )
3064 for test in test_data:
3065 order_by = test.order_by.split(",")
3066 keys = test.keys.split(",")
3067 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3068 if test.limit is not None:
3069 query = query.limit(*test.limit)
3070 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3071 self.assertEqual(dataIds, test.result)
3073 # and materialize
3074 query = do_query(keys).order_by(*order_by)
3075 if test.limit is not None:
3076 query = query.limit(*test.limit)
3077 with self.assertRaises(RelationalAlgebraError):
3078 with query.materialize():
3079 pass
3081 # errors in a name
3082 for order_by in ("", "-"):
3083 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3084 list(do_query().order_by(order_by))
3086 for order_by in ("undimension.name", "-undimension.name"):
3087 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
3088 list(do_query().order_by(order_by))
3090 for order_by in ("attract", "-attract"):
3091 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3092 list(do_query().order_by(order_by))
3094 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3095 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3097 with self.assertRaisesRegex(
3098 ValueError,
3099 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3100 r"qualify timespan with specific dimension name\.",
3101 ):
3102 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3104 with self.assertRaisesRegex(
3105 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3106 ):
3107 list(do_query("tract").order_by("timespan.begin"))
3109 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3110 list(do_query("tract").order_by("tract.timespan.begin"))
3112 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3113 list(do_query("tract").order_by("tract.name"))
3115 with self.assertRaisesRegex(
3116 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3117 ):
3118 list(do_query("visit").order_by("timestamp.begin"))
3120 def testQueryDataIdsGovernorExceptions(self):
3121 """Test exceptions raised by queryDataIds() for incorrect governors."""
3122 registry = self.makeRegistry()
3123 self.loadData(registry, "base.yaml")
3124 self.loadData(registry, "datasets.yaml")
3125 self.loadData(registry, "spatial.yaml")
3127 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3128 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3130 Test = namedtuple(
3131 "testQueryDataIdExceptionsTest",
3132 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3133 defaults=(None, None, None, {}, None, 0),
3134 )
3136 test_data = (
3137 Test("tract,visit", count=6),
3138 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3139 Test(
3140 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3141 ),
3142 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3143 Test(
3144 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3145 ),
3146 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3147 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3148 Test(
3149 "tract,visit",
3150 where="instrument=cam AND skymap=map",
3151 bind={"cam": "Cam1", "map": "SkyMap1"},
3152 count=6,
3153 ),
3154 Test(
3155 "tract,visit",
3156 where="instrument=cam AND skymap=map",
3157 bind={"cam": "Cam", "map": "SkyMap"},
3158 exception=DataIdValueError,
3159 ),
3160 )
3162 for test in test_data:
3163 dimensions = test.dimensions.split(",")
3164 if test.exception:
3165 with self.assertRaises(test.exception):
3166 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3167 else:
3168 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3169 self.assertEqual(query.count(discard=True), test.count)
3171 # and materialize
3172 if test.exception:
3173 with self.assertRaises(test.exception):
3174 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3175 with query.materialize() as materialized:
3176 materialized.count(discard=True)
3177 else:
3178 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3179 with query.materialize() as materialized:
3180 self.assertEqual(materialized.count(discard=True), test.count)
3182 def testQueryDimensionRecordsOrderBy(self):
3183 """Test order_by and limit on result returned by
3184 queryDimensionRecords().
3185 """
3186 registry = self.makeRegistry()
3187 self.loadData(registry, "base.yaml")
3188 self.loadData(registry, "datasets.yaml")
3189 self.loadData(registry, "spatial.yaml")
3191 def do_query(element, datasets=None, collections=None):
3192 return registry.queryDimensionRecords(
3193 element, instrument="Cam1", datasets=datasets, collections=collections
3194 )
3196 query = do_query("detector")
3197 self.assertEqual(len(list(query)), 4)
3199 Test = namedtuple(
3200 "testQueryDataIdsOrderByTest",
3201 ("element", "order_by", "result", "limit", "datasets", "collections"),
3202 defaults=(None, None, None),
3203 )
3205 test_data = (
3206 Test("detector", "detector", (1, 2, 3, 4)),
3207 Test("detector", "-detector", (4, 3, 2, 1)),
3208 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3209 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3210 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3211 Test("visit", "visit", (1, 2)),
3212 Test("visit", "-visit.id", (2, 1)),
3213 Test("visit", "zenith_angle", (1, 2)),
3214 Test("visit", "-visit.name", (2, 1)),
3215 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3216 )
3218 for test in test_data:
3219 order_by = test.order_by.split(",")
3220 query = do_query(test.element).order_by(*order_by)
3221 if test.limit is not None:
3222 query = query.limit(*test.limit)
3223 dataIds = tuple(rec.id for rec in query)
3224 self.assertEqual(dataIds, test.result)
3226 # errors in a name
3227 for order_by in ("", "-"):
3228 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3229 list(do_query("detector").order_by(order_by))
3231 for order_by in ("undimension.name", "-undimension.name"):
3232 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3233 list(do_query("detector").order_by(order_by))
3235 for order_by in ("attract", "-attract"):
3236 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3237 list(do_query("detector").order_by(order_by))
3239 for order_by in ("timestamp.begin", "-timestamp.begin"):
3240 with self.assertRaisesRegex(
3241 ValueError,
3242 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3243 r"perhaps you meant 'timespan.begin'\?",
3244 ):
3245 list(do_query("visit").order_by(order_by))
3247 def testQueryDimensionRecordsExceptions(self):
3248 """Test exceptions raised by queryDimensionRecords()."""
3249 registry = self.makeRegistry()
3250 self.loadData(registry, "base.yaml")
3251 self.loadData(registry, "datasets.yaml")
3252 self.loadData(registry, "spatial.yaml")
3254 result = registry.queryDimensionRecords("detector")
3255 self.assertEqual(result.count(), 4)
3256 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3257 self.assertEqual(result.count(), 4)
3258 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3259 self.assertEqual(result.count(), 4)
3260 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3261 self.assertEqual(result.count(), 4)
3262 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3263 self.assertEqual(result.count(), 4)
3265 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3266 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3267 result.count()
3269 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3270 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3271 result.count()
3273 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3274 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3275 result.count()
3277 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3278 result = registry.queryDimensionRecords(
3279 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3280 )
3281 result.count()
3283 def testDatasetConstrainedDimensionRecordQueries(self):
3284 """Test that queryDimensionRecords works even when given a dataset
3285 constraint whose dimensions extend beyond the requested dimension
3286 element's.
3287 """
3288 registry = self.makeRegistry()
3289 self.loadData(registry, "base.yaml")
3290 self.loadData(registry, "datasets.yaml")
3291 # Query for physical_filter dimension records, using a dataset that
3292 # has both physical_filter and dataset dimensions.
3293 records = registry.queryDimensionRecords(
3294 "physical_filter",
3295 datasets=["flat"],
3296 collections="imported_r",
3297 )
3298 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3299 # Trying to constrain by all dataset types is an error.
3300 with self.assertRaises(TypeError):
3301 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3303 def testSkyPixDatasetQueries(self):
3304 """Test that we can build queries involving skypix dimensions as long
3305 as a dataset type that uses those dimensions is included.
3306 """
3307 registry = self.makeRegistry()
3308 self.loadData(registry, "base.yaml")
3309 dataset_type = DatasetType(
3310 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3311 )
3312 registry.registerDatasetType(dataset_type)
3313 run = "r"
3314 registry.registerRun(run)
3315 # First try queries where there are no datasets; the concern is whether
3316 # we can even build and execute these queries without raising, even
3317 # when "doomed" query shortcuts are in play.
3318 self.assertFalse(
3319 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3320 )
3321 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3322 # Now add a dataset and see that we can get it back.
3323 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3324 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3325 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3326 self.assertEqual(
3327 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3328 {data_id},
3329 )
3330 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3332 def testDatasetIdFactory(self):
3333 """Simple test for DatasetIdFactory, mostly to catch potential changes
3334 in its API.
3335 """
3336 registry = self.makeRegistry()
3337 factory = DatasetIdFactory()
3338 dataset_type = DatasetType(
3339 "datasetType",
3340 dimensions=["detector", "instrument"],
3341 universe=registry.dimensions,
3342 storageClass="int",
3343 )
3344 run = "run"
3345 data_id = DataCoordinate.standardize(
3346 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3347 )
3349 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3350 self.assertIsInstance(datasetId, uuid.UUID)
3351 self.assertEqual(datasetId.version, 4)
3353 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3354 self.assertIsInstance(datasetId, uuid.UUID)
3355 self.assertEqual(datasetId.version, 5)
3357 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3358 self.assertIsInstance(datasetId, uuid.UUID)
3359 self.assertEqual(datasetId.version, 5)
3361 def testExposureQueries(self):
3362 """Test query methods using arguments sourced from the exposure log
3363 service.
3365 The most complete test dataset currently available to daf_butler tests
3366 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3367 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3368 dimension records as it was focused on providing nontrivial spatial
3369 overlaps between visit+detector and tract+patch. So in this test we
3370 need to translate queries that originally used the exposure dimension
3371 to use the (very similar) visit dimension instead.
3372 """
3373 registry = self.makeRegistry()
3374 self.loadData(registry, "hsc-rc2-subset.yaml")
3375 self.assertEqual(
3376 [
3377 record.id
3378 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3379 .order_by("id")
3380 .limit(5)
3381 ],
3382 [318, 322, 326, 330, 332],
3383 )
3384 self.assertEqual(
3385 [
3386 data_id["visit"]
3387 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3388 ],
3389 [318, 322, 326, 330, 332],
3390 )
3391 self.assertEqual(
3392 [
3393 record.id
3394 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3395 .order_by("full_name")
3396 .limit(5)
3397 ],
3398 [73, 72, 71, 70, 65],
3399 )
3400 self.assertEqual(
3401 [
3402 data_id["detector"]
3403 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3404 .order_by("full_name")
3405 .limit(5)
3406 ],
3407 [73, 72, 71, 70, 65],
3408 )
3410 def test_long_query_names(self) -> None:
3411 """Test that queries involving very long names are handled correctly.
3413 This is especially important for PostgreSQL, which truncates symbols
3414 longer than 64 chars, but it's worth testing for all DBs.
3415 """
3416 registry = self.makeRegistry()
3417 name = "abcd" * 17
3418 registry.registerDatasetType(
3419 DatasetType(
3420 name,
3421 dimensions=(),
3422 storageClass="Exposure",
3423 universe=registry.dimensions,
3424 )
3425 )
3426 # Need to search more than one collection actually containing a
3427 # matching dataset to avoid optimizations that sidestep bugs due to
3428 # truncation by making findFirst=True a no-op.
3429 run1 = "run1"
3430 registry.registerRun(run1)
3431 run2 = "run2"
3432 registry.registerRun(run2)
3433 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3434 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3435 self.assertEqual(
3436 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3437 {ref1},
3438 )
3440 def test_skypix_constraint_queries(self) -> None:
3441 """Test queries spatially constrained by a skypix data ID."""
3442 registry = self.makeRegistry()
3443 self.loadData(registry, "hsc-rc2-subset.yaml")
3444 patch_regions = {
3445 (data_id["tract"], data_id["patch"]): data_id.region
3446 for data_id in registry.queryDataIds(["patch"]).expanded()
3447 }
3448 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3449 # This check ensures the test doesn't become trivial due to a config
3450 # change; if it does, just pick a different HTML level.
3451 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3452 # Gather all skypix IDs that definitely overlap at least one of these
3453 # patches.
3454 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3455 for patch_region in patch_regions.values():
3456 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3457 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3458 # and does not overlap at least one other patch.
3459 for skypix_id in itertools.chain.from_iterable(
3460 range(begin, end) for begin, end in relevant_skypix_ids
3461 ):
3462 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3463 overlapping_patches = {
3464 patch_key
3465 for patch_key, patch_region in patch_regions.items()
3466 if not patch_region.isDisjointFrom(skypix_region)
3467 }
3468 if overlapping_patches and overlapping_patches != patch_regions.keys():
3469 break
3470 else:
3471 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3472 self.assertEqual(
3473 {
3474 (data_id["tract"], data_id["patch"])
3475 for data_id in registry.queryDataIds(
3476 ["patch"],
3477 dataId={skypix_dimension.name: skypix_id},
3478 )
3479 },
3480 overlapping_patches,
3481 )
3482 # Test that a three-way join that includes the common skypix system in
3483 # the dimensions doesn't generate redundant join terms in the query.
3484 full_data_ids = set(
3485 registry.queryDataIds(
3486 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3487 ).expanded()
3488 )
3489 self.assertGreater(len(full_data_ids), 0)
3490 for data_id in full_data_ids:
3491 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3492 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3494 def test_spatial_constraint_queries(self) -> None:
3495 """Test queries in which one spatial dimension in the constraint (data
3496 ID or ``where`` string) constrains a different spatial dimension in the
3497 query result columns.
3498 """
3499 registry = self.makeRegistry()
3500 self.loadData(registry, "hsc-rc2-subset.yaml")
3501 patch_regions = {
3502 (data_id["tract"], data_id["patch"]): data_id.region
3503 for data_id in registry.queryDataIds(["patch"]).expanded()
3504 }
3505 observation_regions = {
3506 (data_id["visit"], data_id["detector"]): data_id.region
3507 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3508 }
3509 all_combos = {
3510 (patch_key, observation_key)
3511 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3512 }
3513 overlapping_combos = {
3514 (patch_key, observation_key)
3515 for patch_key, observation_key in all_combos
3516 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3517 }
3518 # Check a direct spatial join with no constraint first.
3519 self.assertEqual(
3520 {
3521 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3522 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3523 },
3524 overlapping_combos,
3525 )
3526 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3527 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3528 for patch_key, observation_key in overlapping_combos:
3529 overlaps_by_patch[patch_key].add(observation_key)
3530 overlaps_by_observation[observation_key].add(patch_key)
3531 # Find patches and observations that overlap at least one of the other
3532 # but not all of the other.
3533 nontrivial_patch = next(
3534 iter(
3535 patch_key
3536 for patch_key, observation_keys in overlaps_by_patch.items()
3537 if observation_keys and observation_keys != observation_regions.keys()
3538 )
3539 )
3540 nontrivial_observation = next(
3541 iter(
3542 observation_key
3543 for observation_key, patch_keys in overlaps_by_observation.items()
3544 if patch_keys and patch_keys != patch_regions.keys()
3545 )
3546 )
3547 # Use the nontrivial patches and observations as constraints on the
3548 # other dimensions in various ways, first via a 'where' expression.
3549 # It's better in general to us 'bind' instead of f-strings, but these
3550 # all integers so there are no quoting concerns.
3551 self.assertEqual(
3552 {
3553 (data_id["visit"], data_id["detector"])
3554 for data_id in registry.queryDataIds(
3555 ["visit", "detector"],
3556 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3557 skymap="hsc_rings_v1",
3558 )
3559 },
3560 overlaps_by_patch[nontrivial_patch],
3561 )
3562 self.assertEqual(
3563 {
3564 (data_id["tract"], data_id["patch"])
3565 for data_id in registry.queryDataIds(
3566 ["patch"],
3567 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3568 instrument="HSC",
3569 )
3570 },
3571 overlaps_by_observation[nontrivial_observation],
3572 )
3573 # and then via the dataId argument.
3574 self.assertEqual(
3575 {
3576 (data_id["visit"], data_id["detector"])
3577 for data_id in registry.queryDataIds(
3578 ["visit", "detector"],
3579 dataId={
3580 "tract": nontrivial_patch[0],
3581 "patch": nontrivial_patch[1],
3582 },
3583 skymap="hsc_rings_v1",
3584 )
3585 },
3586 overlaps_by_patch[nontrivial_patch],
3587 )
3588 self.assertEqual(
3589 {
3590 (data_id["tract"], data_id["patch"])
3591 for data_id in registry.queryDataIds(
3592 ["patch"],
3593 dataId={
3594 "visit": nontrivial_observation[0],
3595 "detector": nontrivial_observation[1],
3596 },
3597 instrument="HSC",
3598 )
3599 },
3600 overlaps_by_observation[nontrivial_observation],
3601 )
3603 def test_query_projection_drop_postprocessing(self) -> None:
3604 """Test that projections and deduplications on query objects can
3605 drop post-query region filtering to ensure the query remains in
3606 the SQL engine.
3607 """
3608 registry = self.makeRegistry()
3609 self.loadData(registry, "base.yaml")
3610 self.loadData(registry, "spatial.yaml")
3612 def pop_transfer(tree: Relation) -> Relation:
3613 """If a relation tree terminates with a transfer to a new engine,
3614 return the relation prior to that transfer. If not, return the
3615 original relation.
3617 Parameters
3618 ----------
3619 tree : `Relation`
3620 The relation tree to modify.
3621 """
3622 match tree:
3623 case Transfer(target=target):
3624 return target
3625 case _:
3626 return tree
3628 # There's no public way to get a Query object yet, so we get one from a
3629 # DataCoordinateQueryResults private attribute. When a public API is
3630 # available this test should use it.
3631 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3632 # We expect this query to terminate in the iteration engine originally,
3633 # because region-filtering is necessary.
3634 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3635 # If we deduplicate, we usually have to do that downstream of the
3636 # filtering. That means the deduplication has to happen in the
3637 # iteration engine.
3638 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3639 # If we pass drop_postprocessing, we instead drop the region filtering
3640 # so the deduplication can happen in SQL (though there might still be
3641 # transfer to iteration at the tail of the tree that we can ignore;
3642 # that's what the pop_transfer takes care of here).
3643 self.assertIsInstance(
3644 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3645 sql.Engine,
3646 )
3648 def test_query_find_datasets_drop_postprocessing(self) -> None:
3649 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3650 problems with the FindFirstDataset relation operation.
3651 """
3652 # Setup: load some visit, tract, and patch records, and insert two
3653 # datasets with dimensions {visit, patch}, with one in each of two
3654 # RUN collections.
3655 registry = self.makeRegistry()
3656 self.loadData(registry, "base.yaml")
3657 self.loadData(registry, "spatial.yaml")
3658 storage_class = StorageClass("Warpy")
3659 registry.storageClasses.registerStorageClass(storage_class)
3660 dataset_type = DatasetType(
3661 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3662 )
3663 registry.registerDatasetType(dataset_type)
3664 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3665 registry.registerRun("run1")
3666 registry.registerRun("run2")
3667 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3668 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3669 # Query for the dataset using queryDataIds(...).findDatasets(...)
3670 # against only one of the two collections. This should work even
3671 # though the relation returned by queryDataIds ends with
3672 # iteration-engine region-filtering, because we can recognize before
3673 # running the query that there is only one collecton to search and
3674 # hence the (default) findFirst=True is irrelevant, and joining in the
3675 # dataset query commutes past the iteration-engine postprocessing.
3676 query1 = registry.queryDataIds(
3677 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3678 )
3679 self.assertEqual(
3680 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3681 {ref1},
3682 )
3683 # Query for the dataset using queryDataIds(...).findDatasets(...)
3684 # against both collections. This can only work if the FindFirstDataset
3685 # operation can be commuted past the iteration-engine options into SQL.
3686 query2 = registry.queryDataIds(
3687 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3688 )
3689 self.assertEqual(
3690 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3691 {ref2},
3692 )
3694 def test_query_empty_collections(self) -> None:
3695 """Test for registry query methods with empty collections. The methods
3696 should return empty result set (or None when applicable) and provide
3697 "doomed" diagnostics.
3698 """
3699 registry = self.makeRegistry()
3700 self.loadData(registry, "base.yaml")
3701 self.loadData(registry, "datasets.yaml")
3703 # Tests for registry.findDataset()
3704 with self.assertRaises(NoDefaultCollectionError):
3705 registry.findDataset("bias", instrument="Cam1", detector=1)
3706 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3707 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3709 # Tests for registry.queryDatasets()
3710 with self.assertRaises(NoDefaultCollectionError):
3711 registry.queryDatasets("bias")
3712 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3714 result = registry.queryDatasets("bias", collections=[])
3715 self.assertEqual(len(list(result)), 0)
3716 messages = list(result.explain_no_results())
3717 self.assertTrue(messages)
3718 self.assertTrue(any("because collection list is empty" in message for message in messages))
3720 # Tests for registry.queryDataIds()
3721 with self.assertRaises(NoDefaultCollectionError):
3722 registry.queryDataIds("detector", datasets="bias")
3723 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3725 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3726 self.assertEqual(len(list(result)), 0)
3727 messages = list(result.explain_no_results())
3728 self.assertTrue(messages)
3729 self.assertTrue(any("because collection list is empty" in message for message in messages))
3731 # Tests for registry.queryDimensionRecords()
3732 with self.assertRaises(NoDefaultCollectionError):
3733 registry.queryDimensionRecords("detector", datasets="bias")
3734 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3736 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3737 self.assertEqual(len(list(result)), 0)
3738 messages = list(result.explain_no_results())
3739 self.assertTrue(messages)
3740 self.assertTrue(any("because collection list is empty" in message for message in messages))
3742 def test_dataset_followup_spatial_joins(self) -> None:
3743 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3744 is involved.
3745 """
3746 registry = self.makeRegistry()
3747 self.loadData(registry, "base.yaml")
3748 self.loadData(registry, "spatial.yaml")
3749 pvi_dataset_type = DatasetType(
3750 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3751 )
3752 registry.registerDatasetType(pvi_dataset_type)
3753 collection = "datasets"
3754 registry.registerRun(collection)
3755 (pvi1,) = registry.insertDatasets(
3756 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3757 )
3758 (pvi2,) = registry.insertDatasets(
3759 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3760 )
3761 (pvi3,) = registry.insertDatasets(
3762 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3763 )
3764 self.assertEqual(
3765 set(
3766 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3767 .expanded()
3768 .findRelatedDatasets("pvi", [collection])
3769 ),
3770 {
3771 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3772 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3773 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3774 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3775 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3776 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3777 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3778 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3779 },
3780 )