Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 6%
1562 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-08 02:51 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from datetime import timedelta
45from threading import Barrier
47import astropy.time
48import sqlalchemy
50try:
51 import numpy as np
52except ImportError:
53 np = None
55import lsst.sphgeom
56from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
58from ..._dataset_association import DatasetAssociation
59from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
60from ..._dataset_type import DatasetType
61from ..._exceptions import (
62 CollectionTypeError,
63 DataIdValueError,
64 InconsistentDataIdError,
65 InvalidQueryError,
66 MissingCollectionError,
67 MissingDatasetTypeError,
68)
69from ..._exceptions_legacy import DatasetTypeError
70from ..._storage_class import StorageClass
71from ..._timespan import Timespan
72from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
73from .._collection_summary import CollectionSummary
74from .._collection_type import CollectionType
75from .._config import RegistryConfig
76from .._exceptions import (
77 ArgumentError,
78 CollectionError,
79 ConflictingDefinitionError,
80 DatasetTypeExpressionError,
81 NoDefaultCollectionError,
82 OrphanedRecordError,
83)
84from .._registry import Registry
85from ..interfaces import ButlerAttributeExistsError
86from ..sql_registry import SqlRegistry
89class RegistryTests(ABC):
90 """Generic tests for the `SqlRegistry` class that can be subclassed to
91 generate tests for different configurations.
92 """
94 collectionsManager: str | None = None
95 """Name of the collections manager class, if subclass provides value for
96 this member then it overrides name specified in default configuration
97 (`str`).
98 """
100 datasetsManager: str | dict[str, str] | None = None
101 """Name or configuration dictionary of the datasets manager class, if
102 subclass provides value for this member then it overrides name specified
103 in default configuration (`str` or `dict`).
104 """
106 supportsCollectionRegex: bool = True
107 """True if the registry class being tested supports regex searches for
108 collections."""
110 supportsDetailedQueryExplain: bool = True
111 """True if the registry class being tested can generate detailed
112 explanations for queries that return no rows by running additional queries
113 to diagnose the problem.
114 """
116 supportsQueryOffset: bool = True
117 """True if the registry class being tested supports the 'offset' parameter
118 to query methods.
119 """
121 supportsQueryGovernorValidation: bool = True
122 """True if the registry class being tested validates that values provided
123 by the user for governor dimensions are correct before running queries.
124 """
126 @classmethod
127 @abstractmethod
128 def getDataDir(cls) -> str:
129 """Return the root directory containing test data YAML files."""
130 raise NotImplementedError()
132 def makeRegistryConfig(self) -> RegistryConfig:
133 """Create RegistryConfig used to create a registry.
135 This method should be called by a subclass from `makeRegistry`.
136 Returned instance will be pre-configured based on the values of class
137 members, and default-configured for all other parameters. Subclasses
138 that need default configuration should just instantiate
139 `RegistryConfig` directly.
140 """
141 config = RegistryConfig()
142 if self.collectionsManager:
143 config["managers", "collections"] = self.collectionsManager
144 if self.datasetsManager:
145 config["managers", "datasets"] = self.datasetsManager
146 return config
148 @abstractmethod
149 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
150 """Return the Registry instance to be tested.
152 Parameters
153 ----------
154 share_repo_with : `Registry`, optional
155 If provided, the new registry should point to the same data
156 repository as this existing registry.
158 Returns
159 -------
160 registry : `Registry`
161 New `Registry` instance, or `None` *only* if `share_repo_with`
162 is not `None` and this test case does not support that argument
163 (e.g. it is impossible with in-memory SQLite DBs).
164 """
165 raise NotImplementedError()
167 def loadData(self, registry: SqlRegistry, filename: str) -> None:
168 """Load registry test data from ``getDataDir/<filename>``,
169 which should be a YAML import/export file.
171 Parameters
172 ----------
173 registry : `SqlRegistry`
174 The registry to load into.
175 filename : `str`
176 The name of the file to load.
177 """
178 from ...transfers import YamlRepoImportBackend
180 with open(os.path.join(self.getDataDir(), filename)) as stream:
181 backend = YamlRepoImportBackend(stream, registry)
182 backend.register()
183 backend.load(datastore=None)
185 def checkQueryResults(self, results, expected):
186 """Check that a query results object contains expected values.
188 Parameters
189 ----------
190 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
191 A lazy-evaluation query results object.
192 expected : `list`
193 A list of `DataCoordinate` o `DatasetRef` objects that should be
194 equal to results of the query, aside from ordering.
195 """
196 self.assertCountEqual(list(results), expected)
197 self.assertEqual(results.count(), len(expected))
198 if expected:
199 self.assertTrue(results.any())
200 else:
201 self.assertFalse(results.any())
203 def testOpaque(self):
204 """Tests for `SqlRegistry.registerOpaqueTable`,
205 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
206 `SqlRegistry.deleteOpaqueData`.
207 """
208 registry = self.makeRegistry()
209 table = "opaque_table_for_testing"
210 registry.registerOpaqueTable(
211 table,
212 spec=ddl.TableSpec(
213 fields=[
214 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
215 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
216 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
217 ],
218 ),
219 )
220 rows = [
221 {"id": 1, "name": "one", "count": None},
222 {"id": 2, "name": "two", "count": 5},
223 {"id": 3, "name": "three", "count": 6},
224 ]
225 registry.insertOpaqueData(table, *rows)
226 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
227 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
228 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
229 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
230 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
231 # Test very long IN clause which exceeds sqlite limit on number of
232 # parameters. SQLite says the limit is 32k but it looks like it is
233 # much higher.
234 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
235 # Two IN clauses, each longer than 1k batch size, first with
236 # duplicates, second has matching elements in different batches (after
237 # sorting).
238 self.assertEqual(
239 rows[0:2],
240 list(
241 registry.fetchOpaqueData(
242 table,
243 id=list(range(1000)) + list(range(100, 0, -1)),
244 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
245 )
246 ),
247 )
248 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
249 registry.deleteOpaqueData(table, id=3)
250 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
251 registry.deleteOpaqueData(table)
252 self.assertEqual([], list(registry.fetchOpaqueData(table)))
254 def testDatasetType(self):
255 """Tests for `SqlRegistry.registerDatasetType` and
256 `SqlRegistry.getDatasetType`.
257 """
258 registry = self.makeRegistry()
259 # Check valid insert
260 datasetTypeName = "test"
261 storageClass = StorageClass("testDatasetType")
262 registry.storageClasses.registerStorageClass(storageClass)
263 dimensions = registry.dimensions.conform(("instrument", "visit"))
264 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
265 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
266 # Inserting for the first time should return True
267 self.assertTrue(registry.registerDatasetType(inDatasetType))
268 outDatasetType1 = registry.getDatasetType(datasetTypeName)
269 self.assertEqual(outDatasetType1, inDatasetType)
271 # Re-inserting should work
272 self.assertFalse(registry.registerDatasetType(inDatasetType))
273 # Except when they are not identical
274 with self.assertRaises(ConflictingDefinitionError):
275 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
276 registry.registerDatasetType(nonIdenticalDatasetType)
278 # Template can be None
279 datasetTypeName = "testNoneTemplate"
280 storageClass = StorageClass("testDatasetType2")
281 registry.storageClasses.registerStorageClass(storageClass)
282 dimensions = registry.dimensions.conform(("instrument", "visit"))
283 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
284 registry.registerDatasetType(inDatasetType)
285 outDatasetType2 = registry.getDatasetType(datasetTypeName)
286 self.assertEqual(outDatasetType2, inDatasetType)
288 allTypes = set(registry.queryDatasetTypes())
289 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
291 # Test some basic queryDatasetTypes functionality
292 missing: list[str] = []
293 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing)
294 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"])
295 self.assertEqual(missing, ["notarealdatasettype"])
297 def testDimensions(self):
298 """Tests for `SqlRegistry.insertDimensionData`,
299 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
300 """
301 registry = self.makeRegistry()
302 dimensionName = "instrument"
303 dimension = registry.dimensions[dimensionName]
304 dimensionValue = {
305 "name": "DummyCam",
306 "visit_max": 10,
307 "visit_system": 0,
308 "exposure_max": 10,
309 "detector_max": 2,
310 "class_name": "lsst.pipe.base.Instrument",
311 }
312 registry.insertDimensionData(dimensionName, dimensionValue)
313 # Inserting the same value twice should fail
314 with self.assertRaises(sqlalchemy.exc.IntegrityError):
315 registry.insertDimensionData(dimensionName, dimensionValue)
316 # expandDataId should retrieve the record we just inserted
317 self.assertEqual(
318 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
319 .records[dimensionName]
320 .toDict(),
321 dimensionValue,
322 )
323 # expandDataId should raise if there is no record with the given ID.
324 with self.assertRaises(DataIdValueError):
325 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
326 # band doesn't have a table; insert should fail.
327 with self.assertRaises(TypeError):
328 registry.insertDimensionData("band", {"band": "i"})
329 dimensionName2 = "physical_filter"
330 dimension2 = registry.dimensions[dimensionName2]
331 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
332 # Missing required dependency ("instrument") should fail
333 with self.assertRaises(KeyError):
334 registry.insertDimensionData(dimensionName2, dimensionValue2)
335 # Adding required dependency should fix the failure
336 dimensionValue2["instrument"] = "DummyCam"
337 registry.insertDimensionData(dimensionName2, dimensionValue2)
338 # expandDataId should retrieve the record we just inserted.
339 self.assertEqual(
340 registry.expandDataId(
341 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
342 )
343 .records[dimensionName2]
344 .toDict(),
345 dimensionValue2,
346 )
347 # Use syncDimensionData to insert a new record successfully.
348 dimensionName3 = "detector"
349 dimensionValue3 = {
350 "instrument": "DummyCam",
351 "id": 1,
352 "full_name": "one",
353 "name_in_raft": "zero",
354 "purpose": "SCIENCE",
355 }
356 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
357 # Sync that again. Note that one field ("raft") is NULL, and that
358 # should be okay.
359 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
360 # Now try that sync with the same primary key but a different value.
361 # This should fail.
362 with self.assertRaises(ConflictingDefinitionError):
363 registry.syncDimensionData(
364 dimensionName3,
365 {
366 "instrument": "DummyCam",
367 "id": 1,
368 "full_name": "one",
369 "name_in_raft": "four",
370 "purpose": "SCIENCE",
371 },
372 )
374 @unittest.skipIf(np is None, "numpy not available.")
375 def testNumpyDataId(self):
376 """Test that we can use a numpy int in a dataId."""
377 registry = self.makeRegistry()
378 dimensionEntries = [
379 ("instrument", {"instrument": "DummyCam"}),
380 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
381 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
382 # Using an np.int64 here fails unless Records.fromDict is also
383 # patched to look for numbers.Integral
384 (
385 "visit",
386 {
387 "instrument": "DummyCam",
388 "id": 42,
389 "name": "fortytwo",
390 "physical_filter": "d-r",
391 "day_obs": 20250101,
392 },
393 ),
394 ]
395 for args in dimensionEntries:
396 registry.insertDimensionData(*args)
398 # Try a normal integer and something that looks like an int but
399 # is not.
400 for visit_id in (42, np.int64(42)):
401 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
402 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
403 self.assertEqual(expanded["visit"], int(visit_id))
404 self.assertIsInstance(expanded["visit"], int)
406 def testDataIdRelationships(self):
407 """Test that `SqlRegistry.expandDataId` raises an exception when the
408 given keys are inconsistent.
409 """
410 registry = self.makeRegistry()
411 self.loadData(registry, "base.yaml")
412 # Insert a few more dimension records for the next test.
413 registry.insertDimensionData(
414 "day_obs",
415 {"instrument": "Cam1", "id": 20250101},
416 )
417 registry.insertDimensionData(
418 "group",
419 {"instrument": "Cam1", "name": "group1"},
420 )
421 registry.insertDimensionData(
422 "exposure",
423 {
424 "instrument": "Cam1",
425 "id": 1,
426 "obs_id": "one",
427 "physical_filter": "Cam1-G",
428 "group": "group1",
429 "day_obs": 20250101,
430 },
431 )
432 registry.insertDimensionData(
433 "group",
434 {"instrument": "Cam1", "name": "group2"},
435 )
436 registry.insertDimensionData(
437 "exposure",
438 {
439 "instrument": "Cam1",
440 "id": 2,
441 "obs_id": "two",
442 "physical_filter": "Cam1-G",
443 "group": "group2",
444 "day_obs": 20250101,
445 },
446 )
447 registry.insertDimensionData(
448 "visit_system",
449 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
450 )
451 registry.insertDimensionData(
452 "visit",
453 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
454 )
455 registry.insertDimensionData(
456 "visit_definition",
457 {"instrument": "Cam1", "visit": 1, "exposure": 1},
458 )
459 with self.assertRaises(InconsistentDataIdError):
460 registry.expandDataId(
461 {"instrument": "Cam1", "visit": 1, "exposure": 2},
462 )
464 def testDataset(self):
465 """Basic tests for `SqlRegistry.insertDatasets`,
466 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
467 """
468 registry = self.makeRegistry()
469 self.loadData(registry, "base.yaml")
470 run = "tésτ"
471 registry.registerRun(run)
472 datasetType = registry.getDatasetType("bias")
473 dataId = {"instrument": "Cam1", "detector": 2}
474 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
475 outRef = registry.getDataset(ref.id)
476 self.assertIsNotNone(ref.id)
477 self.assertEqual(ref, outRef)
478 with self.assertRaises(ConflictingDefinitionError):
479 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
480 registry.removeDatasets([ref])
481 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
483 def testFindDataset(self):
484 """Tests for `SqlRegistry.findDataset`."""
485 registry = self.makeRegistry()
486 self.loadData(registry, "base.yaml")
487 run = "tésτ"
488 datasetType = registry.getDatasetType("bias")
489 dataId = {"instrument": "Cam1", "detector": 4}
490 registry.registerRun(run)
491 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
492 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
493 self.assertEqual(outputRef, inputRef)
494 # Check that retrieval with invalid dataId raises
495 with self.assertRaises(LookupError):
496 dataId = {"instrument": "Cam1"} # no detector
497 registry.findDataset(datasetType, dataId, collections=run)
498 # Check that different dataIds match to different datasets
499 dataId1 = {"instrument": "Cam1", "detector": 1}
500 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
501 dataId2 = {"instrument": "Cam1", "detector": 2}
502 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
503 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
504 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
505 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
506 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
507 # Check that requesting a non-existing dataId returns None
508 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
509 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
510 # Search more than one collection, in which two have the right
511 # dataset type and another does not.
512 registry.registerRun("empty")
513 self.loadData(registry, "datasets.yaml")
514 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
515 self.assertIsNotNone(bias1)
516 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
517 self.assertIsNotNone(bias2)
518 self.assertEqual(
519 bias1,
520 registry.findDataset(
521 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
522 ),
523 )
524 self.assertEqual(
525 bias2,
526 registry.findDataset(
527 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
528 ),
529 )
530 # Search more than one collection, with one of them a CALIBRATION
531 # collection.
532 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
533 timespan = Timespan(
534 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
535 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
536 )
537 registry.certify("Cam1/calib", [bias2], timespan=timespan)
538 self.assertEqual(
539 bias1,
540 registry.findDataset(
541 "bias",
542 instrument="Cam1",
543 detector=2,
544 collections=["empty", "imported_g", "Cam1/calib"],
545 timespan=timespan,
546 ),
547 )
548 self.assertEqual(
549 bias2,
550 registry.findDataset(
551 "bias",
552 instrument="Cam1",
553 detector=2,
554 collections=["empty", "Cam1/calib", "imported_g"],
555 timespan=timespan,
556 ),
557 )
558 # If we try to search those same collections without a timespan, it
559 # should still work, since the CALIBRATION collection is ignored.
560 self.assertEqual(
561 bias1,
562 registry.findDataset(
563 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
564 ),
565 )
566 self.assertEqual(
567 bias1,
568 registry.findDataset(
569 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
570 ),
571 )
573 def testRemoveDatasetTypeSuccess(self):
574 """Test that SqlRegistry.removeDatasetType works when there are no
575 datasets of that type present.
576 """
577 registry = self.makeRegistry()
578 self.loadData(registry, "base.yaml")
579 registry.removeDatasetType("flat")
580 with self.assertRaises(MissingDatasetTypeError):
581 registry.getDatasetType("flat")
583 def testRemoveDatasetTypeFailure(self):
584 """Test that SqlRegistry.removeDatasetType raises when there are
585 datasets of that type present or if the dataset type is for a
586 component.
587 """
588 registry = self.makeRegistry()
589 self.loadData(registry, "base.yaml")
590 self.loadData(registry, "datasets.yaml")
591 with self.assertRaises(OrphanedRecordError):
592 registry.removeDatasetType("flat")
593 with self.assertRaises(DatasetTypeError):
594 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
596 def testImportDatasetsUUID(self):
597 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
598 if isinstance(self.datasetsManager, str):
599 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
600 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
601 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
602 ".ByDimensionsDatasetRecordStorageManagerUUID"
603 ):
604 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
606 registry = self.makeRegistry()
607 self.loadData(registry, "base.yaml")
608 for run in range(6):
609 registry.registerRun(f"run{run}")
610 datasetTypeBias = registry.getDatasetType("bias")
611 datasetTypeFlat = registry.getDatasetType("flat")
612 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
613 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
614 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
616 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
617 (ref1,) = registry._importDatasets([ref])
618 # UUID is used without change
619 self.assertEqual(ref.id, ref1.id)
621 # All different failure modes
622 refs = (
623 # Importing same DatasetRef with different dataset ID is an error
624 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
625 # Same DatasetId but different DataId
626 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
627 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
628 # Same DatasetRef and DatasetId but different run
629 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
630 )
631 for ref in refs:
632 with self.assertRaises(ConflictingDefinitionError):
633 registry._importDatasets([ref])
635 # Test for non-unique IDs, they can be re-imported multiple times.
636 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
637 with self.subTest(idGenMode=idGenMode):
638 # Make dataset ref with reproducible dataset ID.
639 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
640 (ref1,) = registry._importDatasets([ref])
641 self.assertIsInstance(ref1.id, uuid.UUID)
642 self.assertEqual(ref1.id.version, 5)
643 self.assertEqual(ref1.id, ref.id)
645 # Importing it again is OK
646 (ref2,) = registry._importDatasets([ref1])
647 self.assertEqual(ref2.id, ref1.id)
649 # Cannot import to different run with the same ID
650 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
651 with self.assertRaises(ConflictingDefinitionError):
652 registry._importDatasets([ref])
654 ref = DatasetRef(
655 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
656 )
657 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
658 # Cannot import same DATAID_TYPE ref into a new run
659 with self.assertRaises(ConflictingDefinitionError):
660 (ref2,) = registry._importDatasets([ref])
661 else:
662 # DATAID_TYPE_RUN ref can be imported into a new run
663 (ref2,) = registry._importDatasets([ref])
665 def testComponentLookups(self):
666 """Test searching for component datasets via their parents.
668 Components can no longer be found by registry. This test checks
669 that this now fails.
670 """
671 registry = self.makeRegistry()
672 self.loadData(registry, "base.yaml")
673 self.loadData(registry, "datasets.yaml")
674 # Test getting the child dataset type (which does still exist in the
675 # Registry), and check for consistency with
676 # DatasetRef.makeComponentRef.
677 collection = "imported_g"
678 parentType = registry.getDatasetType("bias")
679 childType = registry.getDatasetType("bias.wcs")
680 parentRefResolved = registry.findDataset(
681 parentType, collections=collection, instrument="Cam1", detector=1
682 )
683 self.assertIsInstance(parentRefResolved, DatasetRef)
684 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
685 # Search for a single dataset with findDataset.
686 with self.assertRaises(DatasetTypeError):
687 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
689 def testCollections(self):
690 """Tests for registry methods that manage collections."""
691 registry = self.makeRegistry()
692 other_registry = self.makeRegistry(share_repo_with=registry)
693 self.loadData(registry, "base.yaml")
694 self.loadData(registry, "datasets.yaml")
695 run1 = "imported_g"
696 run2 = "imported_r"
697 # Test setting a collection docstring after it has been created.
698 registry.setCollectionDocumentation(run1, "doc for run1")
699 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
700 registry.setCollectionDocumentation(run1, None)
701 self.assertIsNone(registry.getCollectionDocumentation(run1))
702 datasetType = "bias"
703 # Find some datasets via their run's collection.
704 dataId1 = {"instrument": "Cam1", "detector": 1}
705 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
706 self.assertIsNotNone(ref1)
707 dataId2 = {"instrument": "Cam1", "detector": 2}
708 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
709 self.assertIsNotNone(ref2)
710 # Associate those into a new collection, then look for them there.
711 tag1 = "tag1"
712 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
713 # Check that we can query for old and new collections by type.
714 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
715 self.assertEqual(
716 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
717 {tag1, run1, run2},
718 )
719 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
720 registry.associate(tag1, [ref1, ref2])
721 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
722 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
723 # Disassociate one and verify that we can't it there anymore...
724 registry.disassociate(tag1, [ref1])
725 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
726 # ...but we can still find ref2 in tag1, and ref1 in the run.
727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
729 collections = set(registry.queryCollections())
730 self.assertEqual(collections, {run1, run2, tag1})
731 # Associate both refs into tag1 again; ref2 is already there, but that
732 # should be a harmless no-op.
733 registry.associate(tag1, [ref1, ref2])
734 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
735 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
736 # Get a different dataset (from a different run) that has the same
737 # dataset type and data ID as ref2.
738 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
739 self.assertNotEqual(ref2, ref2b)
740 # Attempting to associate that into tag1 should be an error.
741 with self.assertRaises(ConflictingDefinitionError):
742 registry.associate(tag1, [ref2b])
743 # That error shouldn't have messed up what we had before.
744 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
745 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
746 # Attempt to associate the conflicting dataset again, this time with
747 # a dataset that isn't in the collection and won't cause a conflict.
748 # Should also fail without modifying anything.
749 dataId3 = {"instrument": "Cam1", "detector": 3}
750 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
751 with self.assertRaises(ConflictingDefinitionError):
752 registry.associate(tag1, [ref3, ref2b])
753 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
754 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
755 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
756 # Register a chained collection that searches [tag1, run2]
757 chain1 = "chain1"
758 registry.registerCollection(chain1, type=CollectionType.CHAINED)
759 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
760 # Chained collection exists, but has no collections in it.
761 self.assertFalse(registry.getCollectionChain(chain1))
762 # If we query for all collections, we should get the chained collection
763 # only if we don't ask to flatten it (i.e. yield only its children).
764 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
765 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
766 # Attempt to set its child collections to something circular; that
767 # should fail.
768 with self.assertRaises(ValueError):
769 registry.setCollectionChain(chain1, [tag1, chain1])
770 # Add the child collections.
771 registry.setCollectionChain(chain1, [tag1, run2])
772 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
773 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
774 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
775 # Refresh the other registry that points to the same repo, and make
776 # sure it can see the things we've done (note that this does require
777 # an explicit refresh(); that's the documented behavior, because
778 # caching is ~impossible otherwise).
779 if other_registry is not None:
780 other_registry.refresh()
781 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
782 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
783 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
784 # Searching for dataId1 or dataId2 in the chain should return ref1 and
785 # ref2, because both are in tag1.
786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
787 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
788 # Now disassociate ref2 from tag1. The search (for bias) with
789 # dataId2 in chain1 should then:
790 # 1. not find it in tag1
791 # 2. find a different dataset in run2
792 registry.disassociate(tag1, [ref2])
793 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
794 self.assertNotEqual(ref2b, ref2)
795 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
796 # Define a new chain so we can test recursive chains.
797 chain2 = "chain2"
798 registry.registerCollection(chain2, type=CollectionType.CHAINED)
799 registry.setCollectionChain(chain2, [run2, chain1])
800 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
801 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
803 if self.supportsCollectionRegex:
804 # Query for collections matching a regex.
805 self.assertCountEqual(
806 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
807 ["imported_r", "imported_g"],
808 )
809 # Query for collections matching a regex or an explicit str.
810 self.assertCountEqual(
811 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
812 ["imported_r", "imported_g", "chain1"],
813 )
814 # Same queries as the regex ones above, but using globs instead of
815 # regex.
816 self.assertCountEqual(
817 list(registry.queryCollections("imported_*", flattenChains=False)),
818 ["imported_r", "imported_g"],
819 )
820 # Query for collections matching a regex or an explicit str.
821 self.assertCountEqual(
822 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
823 ["imported_r", "imported_g", "chain1"],
824 )
826 # Search for bias with dataId1 should find it via tag1 in chain2,
827 # recursing, because is not in run1.
828 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
829 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
830 # Search for bias with dataId2 should find it in run2 (ref2b).
831 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
832 # Search for a flat that is in run2. That should not be found
833 # at the front of chain2, because of the restriction to bias
834 # on run2 there, but it should be found in at the end of chain1.
835 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
836 ref4 = registry.findDataset("flat", dataId4, collections=run2)
837 self.assertIsNotNone(ref4)
838 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
839 # Deleting a collection that's part of a CHAINED collection is not
840 # allowed, and is exception-safe.
841 with self.assertRaises(sqlalchemy.exc.IntegrityError):
842 registry.removeCollection(run2)
843 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
844 with self.assertRaises(sqlalchemy.exc.IntegrityError):
845 registry.removeCollection(chain1)
846 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
847 # Actually remove chain2, test that it's gone by asking for its type.
848 registry.removeCollection(chain2)
849 with self.assertRaises(MissingCollectionError):
850 registry.getCollectionType(chain2)
851 # Actually remove run2 and chain1, which should work now.
852 registry.removeCollection(chain1)
853 registry.removeCollection(run2)
854 with self.assertRaises(MissingCollectionError):
855 registry.getCollectionType(run2)
856 with self.assertRaises(MissingCollectionError):
857 registry.getCollectionType(chain1)
858 # Remove tag1 as well, just to test that we can remove TAGGED
859 # collections.
860 registry.removeCollection(tag1)
861 with self.assertRaises(MissingCollectionError):
862 registry.getCollectionType(tag1)
864 def testCollectionChainCaching(self):
865 registry = self.makeRegistry()
866 with registry.caching_context():
867 registry.registerCollection("a")
868 registry.registerCollection("chain", CollectionType.CHAINED)
869 # There used to be a caching bug (DM-43750) that would throw an
870 # exception if you modified a collection chain for a collection
871 # that was already in the cache.
872 registry.setCollectionChain("chain", ["a"])
873 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"])
875 def testCollectionChainFlatten(self):
876 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
877 option.
878 """
879 registry = self.makeRegistry()
880 registry.registerCollection("inner", CollectionType.CHAINED)
881 registry.registerCollection("innermost", CollectionType.RUN)
882 registry.setCollectionChain("inner", ["innermost"])
883 registry.registerCollection("outer", CollectionType.CHAINED)
884 registry.setCollectionChain("outer", ["inner"], flatten=False)
885 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
886 registry.setCollectionChain("outer", ["inner"], flatten=True)
887 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
889 def testCollectionChainPrependConcurrency(self):
890 """Verify that locking via database row locks is working as
891 expected.
892 """
894 def blocked_thread_func(registry: SqlRegistry):
895 # This call will become blocked after it has decided on positions
896 # for the new children in the collection chain, but before
897 # inserting them.
898 registry._managers.collections.prepend_collection_chain("chain", ["a"])
900 def unblocked_thread_func(registry: SqlRegistry):
901 registry._managers.collections.prepend_collection_chain("chain", ["b"])
903 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
905 # blocked_thread_func should have finished first, inserting "a".
906 # unblocked_thread_func should have finished second, prepending "b".
907 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
909 def testCollectionChainReplaceConcurrency(self):
910 """Verify that locking via database row locks is working as
911 expected.
912 """
914 def blocked_thread_func(registry: SqlRegistry):
915 # This call will become blocked after deleting children, but before
916 # inserting new ones.
917 registry.setCollectionChain("chain", ["a"])
919 def unblocked_thread_func(registry: SqlRegistry):
920 registry.setCollectionChain("chain", ["b"])
922 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
924 # blocked_thread_func should have finished first.
925 # unblocked_thread_func should have finished second, overwriting the
926 # chain with "b".
927 self.assertEqual(("b",), registry.getCollectionChain("chain"))
929 def _do_collection_concurrency_test(
930 self, blocked_thread_func: Callable[[SqlRegistry]], unblocked_thread_func: Callable[[SqlRegistry]]
931 ) -> SqlRegistry:
932 # This function:
933 # 1. Sets up two registries pointing at the same database.
934 # 2. Start running 'blocked_thread_func' in a background thread,
935 # arranging for it to become blocked during a critical section in
936 # the collections manager.
937 # 3. Wait for 'blocked_thread_func' to reach the critical section
938 # 4. Start running 'unblocked_thread_func'.
939 # 5. Allow both functions to run to completion.
941 # Set up two registries pointing to the same DB
942 registry1 = self.makeRegistry()
943 assert isinstance(registry1, SqlRegistry)
944 registry2 = self.makeRegistry(share_repo_with=registry1)
945 if registry2 is None:
946 # This will happen for in-memory SQL databases.
947 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
949 registry1.registerCollection("chain", CollectionType.CHAINED)
950 for collection in ["a", "b"]:
951 registry1.registerCollection(collection)
953 # Arrange for registry1 to block during its critical section, allowing
954 # us to detect this and control when it becomes unblocked.
955 enter_barrier = Barrier(2, timeout=60)
956 exit_barrier = Barrier(2, timeout=60)
958 def wait_for_barrier():
959 enter_barrier.wait()
960 exit_barrier.wait()
962 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
964 with ThreadPoolExecutor(max_workers=1) as exec1:
965 with ThreadPoolExecutor(max_workers=1) as exec2:
966 future1 = exec1.submit(blocked_thread_func, registry1)
967 enter_barrier.wait()
969 # At this point registry 1 has entered the critical section and
970 # is waiting for us to release it. Start the other thread.
971 future2 = exec2.submit(unblocked_thread_func, registry2)
972 # thread2 should block inside a database call, but we have no
973 # way to detect when it is in this state.
974 time.sleep(0.200)
976 # Let the threads run to completion.
977 exit_barrier.wait()
978 future1.result()
979 future2.result()
981 return registry1
983 def testBasicTransaction(self):
984 """Test that all operations within a single transaction block are
985 rolled back if an exception propagates out of the block.
986 """
987 registry = self.makeRegistry()
988 storageClass = StorageClass("testDatasetType")
989 registry.storageClasses.registerStorageClass(storageClass)
990 with registry.transaction():
991 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
992 with self.assertRaises(ValueError):
993 with registry.transaction():
994 registry.insertDimensionData("instrument", {"name": "Cam2"})
995 raise ValueError("Oops, something went wrong")
996 # Cam1 should exist
997 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
998 # But Cam2 and Cam3 should both not exist
999 with self.assertRaises(DataIdValueError):
1000 registry.expandDataId(instrument="Cam2")
1001 with self.assertRaises(DataIdValueError):
1002 registry.expandDataId(instrument="Cam3")
1004 def testNestedTransaction(self):
1005 """Test that operations within a transaction block are not rolled back
1006 if an exception propagates out of an inner transaction block and is
1007 then caught.
1008 """
1009 registry = self.makeRegistry()
1010 dimension = registry.dimensions["instrument"]
1011 dataId1 = {"instrument": "DummyCam"}
1012 dataId2 = {"instrument": "DummyCam2"}
1013 checkpointReached = False
1014 with registry.transaction():
1015 # This should be added and (ultimately) committed.
1016 registry.insertDimensionData(dimension, dataId1)
1017 with self.assertRaises(sqlalchemy.exc.IntegrityError):
1018 with registry.transaction(savepoint=True):
1019 # This does not conflict, and should succeed (but not
1020 # be committed).
1021 registry.insertDimensionData(dimension, dataId2)
1022 checkpointReached = True
1023 # This should conflict and raise, triggerring a rollback
1024 # of the previous insertion within the same transaction
1025 # context, but not the original insertion in the outer
1026 # block.
1027 registry.insertDimensionData(dimension, dataId1)
1028 self.assertTrue(checkpointReached)
1029 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
1030 with self.assertRaises(DataIdValueError):
1031 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
1033 def testInstrumentDimensions(self):
1034 """Test queries involving only instrument dimensions, with no joins to
1035 skymap.
1036 """
1037 registry = self.makeRegistry()
1039 # need a bunch of dimensions and datasets for test
1040 registry.insertDimensionData(
1041 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1042 )
1043 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1044 registry.insertDimensionData(
1045 "physical_filter",
1046 dict(instrument="DummyCam", name="dummy_r", band="r"),
1047 dict(instrument="DummyCam", name="dummy_i", band="i"),
1048 )
1049 registry.insertDimensionData(
1050 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1051 )
1052 registry.insertDimensionData(
1053 "visit",
1054 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1055 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1056 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1057 )
1058 registry.insertDimensionData(
1059 "group",
1060 dict(instrument="DummyCam", name="ten"),
1061 dict(instrument="DummyCam", name="eleven"),
1062 dict(instrument="DummyCam", name="twelve"),
1063 )
1064 for i in range(1, 6):
1065 registry.insertDimensionData(
1066 "visit_detector_region",
1067 dict(instrument="DummyCam", visit=10, detector=i),
1068 dict(instrument="DummyCam", visit=11, detector=i),
1069 dict(instrument="DummyCam", visit=20, detector=i),
1070 )
1071 registry.insertDimensionData(
1072 "exposure",
1073 dict(
1074 instrument="DummyCam",
1075 id=100,
1076 obs_id="100",
1077 physical_filter="dummy_i",
1078 group="ten",
1079 day_obs=20250101,
1080 ),
1081 dict(
1082 instrument="DummyCam",
1083 id=101,
1084 obs_id="101",
1085 physical_filter="dummy_i",
1086 group="ten",
1087 day_obs=20250101,
1088 ),
1089 dict(
1090 instrument="DummyCam",
1091 id=110,
1092 obs_id="110",
1093 physical_filter="dummy_r",
1094 group="eleven",
1095 day_obs=20250101,
1096 ),
1097 dict(
1098 instrument="DummyCam",
1099 id=111,
1100 obs_id="111",
1101 physical_filter="dummy_r",
1102 group="eleven",
1103 day_obs=20250101,
1104 ),
1105 dict(
1106 instrument="DummyCam",
1107 id=200,
1108 obs_id="200",
1109 physical_filter="dummy_r",
1110 group="twelve",
1111 day_obs=20250101,
1112 ),
1113 dict(
1114 instrument="DummyCam",
1115 id=201,
1116 obs_id="201",
1117 physical_filter="dummy_r",
1118 group="twelve",
1119 day_obs=20250101,
1120 ),
1121 )
1122 registry.insertDimensionData(
1123 "visit_definition",
1124 dict(instrument="DummyCam", exposure=100, visit=10),
1125 dict(instrument="DummyCam", exposure=101, visit=10),
1126 dict(instrument="DummyCam", exposure=110, visit=11),
1127 dict(instrument="DummyCam", exposure=111, visit=11),
1128 dict(instrument="DummyCam", exposure=200, visit=20),
1129 dict(instrument="DummyCam", exposure=201, visit=20),
1130 )
1131 # dataset types
1132 run1 = "test1_r"
1133 run2 = "test2_r"
1134 tagged2 = "test2_t"
1135 registry.registerRun(run1)
1136 registry.registerRun(run2)
1137 registry.registerCollection(tagged2)
1138 storageClass = StorageClass("testDataset")
1139 registry.storageClasses.registerStorageClass(storageClass)
1140 rawType = DatasetType(
1141 name="RAW",
1142 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1143 storageClass=storageClass,
1144 )
1145 registry.registerDatasetType(rawType)
1146 calexpType = DatasetType(
1147 name="CALEXP",
1148 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1149 storageClass=storageClass,
1150 )
1151 registry.registerDatasetType(calexpType)
1153 # add pre-existing datasets
1154 for exposure in (100, 101, 110, 111):
1155 for detector in (1, 2, 3):
1156 # note that only 3 of 5 detectors have datasets
1157 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1158 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1159 # exposures 100 and 101 appear in both run1 and tagged2.
1160 # 100 has different datasets in the different collections
1161 # 101 has the same dataset in both collections.
1162 if exposure == 100:
1163 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1164 if exposure in (100, 101):
1165 registry.associate(tagged2, [ref])
1166 # Add pre-existing datasets to tagged2.
1167 for exposure in (200, 201):
1168 for detector in (3, 4, 5):
1169 # note that only 3 of 5 detectors have datasets
1170 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1171 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1172 registry.associate(tagged2, [ref])
1174 dimensions = registry.dimensions.conform(
1175 rawType.dimensions.required.names | calexpType.dimensions.required.names
1176 )
1177 # Test that single dim string works as well as list of str
1178 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1179 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1180 self.assertEqual(rows, rowsI)
1181 # with empty expression
1182 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1183 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1184 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1185 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1186 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1188 # second collection
1189 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1190 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1191 for dataId in rows:
1192 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1193 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1194 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1195 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1197 # with two input datasets
1198 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1199 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1200 for dataId in rows:
1201 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1202 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1203 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1204 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1206 # limit to single visit
1207 rows = registry.queryDataIds(
1208 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1209 ).toSet()
1210 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1211 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1212 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1213 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1215 # more limiting expression, using link names instead of Table.column
1216 rows = registry.queryDataIds(
1217 dimensions,
1218 datasets=rawType,
1219 collections=run1,
1220 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1221 ).toSet()
1222 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1223 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1224 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1225 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1227 # queryDataIds with only one of `datasets` and `collections` is an
1228 # error.
1229 with self.assertRaises(CollectionError):
1230 registry.queryDataIds(dimensions, datasets=rawType)
1231 with self.assertRaises(ArgumentError):
1232 registry.queryDataIds(dimensions, collections=run1)
1234 # expression excludes everything
1235 rows = registry.queryDataIds(
1236 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1237 ).toSet()
1238 self.assertEqual(len(rows), 0)
1240 # Selecting by physical_filter, this is not in the dimensions, but it
1241 # is a part of the full expression so it should work too.
1242 rows = registry.queryDataIds(
1243 dimensions,
1244 datasets=rawType,
1245 collections=run1,
1246 where="physical_filter = 'dummy_r'",
1247 instrument="DummyCam",
1248 ).toSet()
1249 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1250 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1251 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1252 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1254 def testSkyMapDimensions(self):
1255 """Tests involving only skymap dimensions, no joins to instrument."""
1256 registry = self.makeRegistry()
1258 # need a bunch of dimensions and datasets for test, we want
1259 # "band" in the test so also have to add physical_filter
1260 # dimensions
1261 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1262 registry.insertDimensionData(
1263 "physical_filter",
1264 dict(instrument="DummyCam", name="dummy_r", band="r"),
1265 dict(instrument="DummyCam", name="dummy_i", band="i"),
1266 )
1267 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1268 for tract in range(10):
1269 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1270 registry.insertDimensionData(
1271 "patch",
1272 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1273 )
1275 # dataset types
1276 run = "tésτ"
1277 registry.registerRun(run)
1278 storageClass = StorageClass("testDataset")
1279 registry.storageClasses.registerStorageClass(storageClass)
1280 calexpType = DatasetType(
1281 name="deepCoadd_calexp",
1282 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1283 storageClass=storageClass,
1284 )
1285 registry.registerDatasetType(calexpType)
1286 mergeType = DatasetType(
1287 name="deepCoadd_mergeDet",
1288 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1289 storageClass=storageClass,
1290 )
1291 registry.registerDatasetType(mergeType)
1292 measType = DatasetType(
1293 name="deepCoadd_meas",
1294 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1295 storageClass=storageClass,
1296 )
1297 registry.registerDatasetType(measType)
1299 dimensions = registry.dimensions.conform(
1300 calexpType.dimensions.required.names
1301 | mergeType.dimensions.required.names
1302 | measType.dimensions.required.names
1303 )
1305 # add pre-existing datasets
1306 for tract in (1, 3, 5):
1307 for patch in (2, 4, 6, 7):
1308 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1309 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1310 for aFilter in ("i", "r"):
1311 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1312 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1314 # with empty expression
1315 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1316 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1317 for dataId in rows:
1318 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1319 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1320 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1321 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1323 # limit to 2 tracts and 2 patches
1324 rows = registry.queryDataIds(
1325 dimensions,
1326 datasets=[calexpType, mergeType],
1327 collections=run,
1328 where="tract IN (1, 5) AND patch IN (2, 7)",
1329 skymap="DummyMap",
1330 ).toSet()
1331 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1332 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1333 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1334 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1336 # limit to single filter
1337 rows = registry.queryDataIds(
1338 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1339 ).toSet()
1340 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1341 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1342 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1343 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1345 # Specifying non-existing skymap is an exception
1346 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1347 rows = registry.queryDataIds(
1348 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1349 ).toSet()
1351 def testSpatialJoin(self):
1352 """Test queries that involve spatial overlap joins."""
1353 registry = self.makeRegistry()
1354 self.loadData(registry, "hsc-rc2-subset.yaml")
1356 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1357 # the TopologicalFamily they belong to. We'll relate all elements in
1358 # each family to all of the elements in each other family.
1359 families = defaultdict(set)
1360 # Dictionary of {element.name: {dataId: region}}.
1361 regions = {}
1362 for element in registry.dimensions.database_elements:
1363 if element.spatial is not None:
1364 families[element.spatial.name].add(element)
1365 regions[element.name] = {
1366 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1367 }
1369 # If this check fails, it's not necessarily a problem - it may just be
1370 # a reasonable change to the default dimension definitions - but the
1371 # test below depends on there being more than one family to do anything
1372 # useful.
1373 self.assertEqual(len(families), 2)
1375 # Overlap DatabaseDimensionElements with each other.
1376 for family1, family2 in itertools.combinations(families, 2):
1377 for element1, element2 in itertools.product(families[family1], families[family2]):
1378 dimensions = element1.minimal_group | element2.minimal_group
1379 # Construct expected set of overlapping data IDs via a
1380 # brute-force comparison of the regions we've already fetched.
1381 expected = {
1382 DataCoordinate.standardize(
1383 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1384 )
1385 for (dataId1, region1), (dataId2, region2) in itertools.product(
1386 regions[element1.name].items(), regions[element2.name].items()
1387 )
1388 if not region1.isDisjointFrom(region2)
1389 }
1390 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1391 queried = set(registry.queryDataIds(dimensions))
1392 self.assertEqual(expected, queried)
1394 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1395 commonSkyPix = registry.dimensions.commonSkyPix
1396 for elementName, these_regions in regions.items():
1397 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1398 expected = set()
1399 for dataId, region in these_regions.items():
1400 for begin, end in commonSkyPix.pixelization.envelope(region):
1401 expected.update(
1402 DataCoordinate.standardize(
1403 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1404 )
1405 for index in range(begin, end)
1406 )
1407 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1408 queried = set(registry.queryDataIds(dimensions))
1409 self.assertEqual(expected, queried)
1411 def testAbstractQuery(self):
1412 """Test that we can run a query that just lists the known
1413 bands. This is tricky because band is
1414 backed by a query against physical_filter.
1415 """
1416 registry = self.makeRegistry()
1417 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1418 registry.insertDimensionData(
1419 "physical_filter",
1420 dict(instrument="DummyCam", name="dummy_i", band="i"),
1421 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1422 dict(instrument="DummyCam", name="dummy_r", band="r"),
1423 )
1424 rows = registry.queryDataIds(["band"]).toSet()
1425 self.assertCountEqual(
1426 rows,
1427 [
1428 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1429 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1430 ],
1431 )
1433 def testAttributeManager(self):
1434 """Test basic functionality of attribute manager."""
1435 # number of attributes with schema versions in a fresh database,
1436 # 6 managers with 2 records per manager, plus config for dimensions
1437 VERSION_COUNT = 6 * 2 + 1
1439 registry = self.makeRegistry()
1440 attributes = registry._managers.attributes
1442 # check what get() returns for non-existing key
1443 self.assertIsNone(attributes.get("attr"))
1444 self.assertEqual(attributes.get("attr", ""), "")
1445 self.assertEqual(attributes.get("attr", "Value"), "Value")
1446 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1448 # cannot store empty key or value
1449 with self.assertRaises(ValueError):
1450 attributes.set("", "value")
1451 with self.assertRaises(ValueError):
1452 attributes.set("attr", "")
1454 # set value of non-existing key
1455 attributes.set("attr", "value")
1456 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1457 self.assertEqual(attributes.get("attr"), "value")
1459 # update value of existing key
1460 with self.assertRaises(ButlerAttributeExistsError):
1461 attributes.set("attr", "value2")
1463 attributes.set("attr", "value2", force=True)
1464 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1465 self.assertEqual(attributes.get("attr"), "value2")
1467 # delete existing key
1468 self.assertTrue(attributes.delete("attr"))
1469 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1471 # delete non-existing key
1472 self.assertFalse(attributes.delete("non-attr"))
1474 # store bunch of keys and get the list back
1475 data = [
1476 ("version.core", "1.2.3"),
1477 ("version.dimensions", "3.2.1"),
1478 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1479 ]
1480 for key, value in data:
1481 attributes.set(key, value)
1482 items = dict(attributes.items())
1483 for key, value in data:
1484 self.assertEqual(items[key], value)
1486 def testQueryDatasetsDeduplication(self):
1487 """Test that the findFirst option to queryDatasets selects datasets
1488 from collections in the order given".
1489 """
1490 registry = self.makeRegistry()
1491 self.loadData(registry, "base.yaml")
1492 self.loadData(registry, "datasets.yaml")
1493 self.assertCountEqual(
1494 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1495 [
1496 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1497 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1498 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1499 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1500 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1501 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1502 ],
1503 )
1504 self.assertCountEqual(
1505 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1506 [
1507 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1508 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1509 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1510 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1511 ],
1512 )
1513 self.assertCountEqual(
1514 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1515 [
1516 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1517 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1518 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1519 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1520 ],
1521 )
1523 def testQueryResults(self):
1524 """Test querying for data IDs and then manipulating the QueryResults
1525 object returned to perform other queries.
1526 """
1527 registry = self.makeRegistry()
1528 self.loadData(registry, "base.yaml")
1529 self.loadData(registry, "datasets.yaml")
1530 bias = registry.getDatasetType("bias")
1531 flat = registry.getDatasetType("flat")
1532 # Obtain expected results from methods other than those we're testing
1533 # here. That includes:
1534 # - the dimensions of the data IDs we want to query:
1535 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1536 # - the dimensions of some other data IDs we'll extract from that:
1537 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1538 # - the data IDs we expect to obtain from the first queries:
1539 expectedDataIds = DataCoordinateSet(
1540 {
1541 DataCoordinate.standardize(
1542 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1543 )
1544 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1545 },
1546 dimensions=expected_dimensions,
1547 hasFull=False,
1548 hasRecords=False,
1549 )
1550 # - the flat datasets we expect to find from those data IDs, in just
1551 # one collection (so deduplication is irrelevant):
1552 expectedFlats = [
1553 registry.findDataset(
1554 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1555 ),
1556 registry.findDataset(
1557 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1558 ),
1559 registry.findDataset(
1560 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1561 ),
1562 ]
1563 # - the data IDs we expect to extract from that:
1564 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1565 # - the bias datasets we expect to find from those data IDs, after we
1566 # subset-out the physical_filter dimension, both with duplicates:
1567 expectedAllBiases = [
1568 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1569 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1570 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1571 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1572 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1573 ]
1574 # - ...and without duplicates:
1575 expectedDeduplicatedBiases = [
1576 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1577 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1578 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1579 ]
1580 # Test against those expected results, using a "lazy" query for the
1581 # data IDs (which re-executes that query each time we use it to do
1582 # something new).
1583 dataIds = registry.queryDataIds(
1584 ["detector", "physical_filter"],
1585 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1586 instrument="Cam1",
1587 )
1588 self.assertEqual(dataIds.dimensions, expected_dimensions)
1589 self.assertEqual(dataIds.toSet(), expectedDataIds)
1590 self.assertCountEqual(
1591 list(
1592 dataIds.findDatasets(
1593 flat,
1594 collections=["imported_r"],
1595 )
1596 ),
1597 expectedFlats,
1598 )
1599 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1600 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1601 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1602 self.assertCountEqual(
1603 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1604 expectedAllBiases,
1605 )
1606 self.assertCountEqual(
1607 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1608 expectedDeduplicatedBiases,
1609 )
1611 # Searching for a dataset with dimensions we had projected away
1612 # restores those dimensions.
1613 self.assertCountEqual(
1614 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1615 expectedFlats,
1616 )
1618 # Use a named dataset type that does not exist and a dataset type
1619 # object that does not exist.
1620 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1622 # Test both string name and dataset type object.
1623 test_type: str | DatasetType
1624 for test_type, test_type_name in (
1625 (unknown_type, unknown_type.name),
1626 (unknown_type.name, unknown_type.name),
1627 ):
1628 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1629 list(
1630 subsetDataIds.findDatasets(
1631 test_type, collections=["imported_r", "imported_g"], findFirst=True
1632 )
1633 )
1635 # Materialize the bias dataset queries (only) by putting the results
1636 # into temporary tables, then repeat those tests.
1637 with subsetDataIds.findDatasets(
1638 bias, collections=["imported_r", "imported_g"], findFirst=False
1639 ).materialize() as biases:
1640 self.assertCountEqual(list(biases), expectedAllBiases)
1641 with subsetDataIds.findDatasets(
1642 bias, collections=["imported_r", "imported_g"], findFirst=True
1643 ).materialize() as biases:
1644 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1645 # Materialize the data ID subset query, but not the dataset queries.
1646 with subsetDataIds.materialize() as subsetDataIds:
1647 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1648 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1649 self.assertCountEqual(
1650 list(
1651 subsetDataIds.findDatasets(
1652 bias, collections=["imported_r", "imported_g"], findFirst=False
1653 )
1654 ),
1655 expectedAllBiases,
1656 )
1657 self.assertCountEqual(
1658 list(
1659 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1660 ),
1661 expectedDeduplicatedBiases,
1662 )
1663 # Materialize the dataset queries, too.
1664 with subsetDataIds.findDatasets(
1665 bias, collections=["imported_r", "imported_g"], findFirst=False
1666 ).materialize() as biases:
1667 self.assertCountEqual(list(biases), expectedAllBiases)
1668 with subsetDataIds.findDatasets(
1669 bias, collections=["imported_r", "imported_g"], findFirst=True
1670 ).materialize() as biases:
1671 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1672 # Materialize the original query, but none of the follow-up queries.
1673 with dataIds.materialize() as dataIds:
1674 self.assertEqual(dataIds.dimensions, expected_dimensions)
1675 self.assertEqual(dataIds.toSet(), expectedDataIds)
1676 self.assertCountEqual(
1677 list(
1678 dataIds.findDatasets(
1679 flat,
1680 collections=["imported_r"],
1681 )
1682 ),
1683 expectedFlats,
1684 )
1685 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1686 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1687 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1688 self.assertCountEqual(
1689 list(
1690 subsetDataIds.findDatasets(
1691 bias, collections=["imported_r", "imported_g"], findFirst=False
1692 )
1693 ),
1694 expectedAllBiases,
1695 )
1696 self.assertCountEqual(
1697 list(
1698 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1699 ),
1700 expectedDeduplicatedBiases,
1701 )
1702 # Materialize just the bias dataset queries.
1703 with subsetDataIds.findDatasets(
1704 bias, collections=["imported_r", "imported_g"], findFirst=False
1705 ).materialize() as biases:
1706 self.assertCountEqual(list(biases), expectedAllBiases)
1707 with subsetDataIds.findDatasets(
1708 bias, collections=["imported_r", "imported_g"], findFirst=True
1709 ).materialize() as biases:
1710 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1711 # Materialize the subset data ID query, but not the dataset
1712 # queries.
1713 with subsetDataIds.materialize() as subsetDataIds:
1714 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1715 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1716 self.assertCountEqual(
1717 list(
1718 subsetDataIds.findDatasets(
1719 bias, collections=["imported_r", "imported_g"], findFirst=False
1720 )
1721 ),
1722 expectedAllBiases,
1723 )
1724 self.assertCountEqual(
1725 list(
1726 subsetDataIds.findDatasets(
1727 bias, collections=["imported_r", "imported_g"], findFirst=True
1728 )
1729 ),
1730 expectedDeduplicatedBiases,
1731 )
1732 # Materialize the bias dataset queries, too, so now we're
1733 # materializing every single step.
1734 with subsetDataIds.findDatasets(
1735 bias, collections=["imported_r", "imported_g"], findFirst=False
1736 ).materialize() as biases:
1737 self.assertCountEqual(list(biases), expectedAllBiases)
1738 with subsetDataIds.findDatasets(
1739 bias, collections=["imported_r", "imported_g"], findFirst=True
1740 ).materialize() as biases:
1741 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1743 def testStorageClassPropagation(self):
1744 """Test that queries for datasets respect the storage class passed in
1745 as part of a full dataset type.
1746 """
1747 registry = self.makeRegistry()
1748 self.loadData(registry, "base.yaml")
1749 dataset_type_in_registry = DatasetType(
1750 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1751 )
1752 registry.registerDatasetType(dataset_type_in_registry)
1753 run = "run1"
1754 registry.registerRun(run)
1755 (inserted_ref,) = registry.insertDatasets(
1756 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1757 )
1758 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1759 query_dataset_type = DatasetType(
1760 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1761 )
1762 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1763 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1764 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1765 (query_datasets_ref,) = query_datasets_result
1766 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1767 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1768 query_dataset_type, collections=[run]
1769 )
1770 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1771 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1772 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1773 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1774 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1775 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1776 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1778 def testEmptyDimensionsQueries(self):
1779 """Test Query and QueryResults objects in the case where there are no
1780 dimensions.
1781 """
1782 # Set up test data: one dataset type, two runs, one dataset in each.
1783 registry = self.makeRegistry()
1784 self.loadData(registry, "base.yaml")
1785 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1786 registry.registerDatasetType(schema)
1787 dataId = DataCoordinate.make_empty(registry.dimensions)
1788 run1 = "run1"
1789 run2 = "run2"
1790 registry.registerRun(run1)
1791 registry.registerRun(run2)
1792 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1793 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1794 # Query directly for both of the datasets, and each one, one at a time.
1795 self.checkQueryResults(
1796 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1797 )
1798 self.checkQueryResults(
1799 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1800 [dataset1],
1801 )
1802 self.checkQueryResults(
1803 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1804 [dataset2],
1805 )
1806 # Query for data IDs with no dimensions.
1807 dataIds = registry.queryDataIds([])
1808 self.checkQueryResults(dataIds, [dataId])
1809 # Use queried data IDs to find the datasets.
1810 self.checkQueryResults(
1811 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1812 [dataset1, dataset2],
1813 )
1814 self.checkQueryResults(
1815 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1816 [dataset1],
1817 )
1818 self.checkQueryResults(
1819 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1820 [dataset2],
1821 )
1822 # Now materialize the data ID query results and repeat those tests.
1823 with dataIds.materialize() as dataIds:
1824 self.checkQueryResults(dataIds, [dataId])
1825 self.checkQueryResults(
1826 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1827 [dataset1],
1828 )
1829 self.checkQueryResults(
1830 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1831 [dataset2],
1832 )
1833 # Query for non-empty data IDs, then subset that to get the empty one.
1834 # Repeat the above tests starting from that.
1835 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1836 self.checkQueryResults(dataIds, [dataId])
1837 self.checkQueryResults(
1838 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1839 [dataset1, dataset2],
1840 )
1841 self.checkQueryResults(
1842 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1843 [dataset1],
1844 )
1845 self.checkQueryResults(
1846 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1847 [dataset2],
1848 )
1849 with dataIds.materialize() as dataIds:
1850 self.checkQueryResults(dataIds, [dataId])
1851 self.checkQueryResults(
1852 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1853 [dataset1, dataset2],
1854 )
1855 self.checkQueryResults(
1856 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1857 [dataset1],
1858 )
1859 self.checkQueryResults(
1860 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1861 [dataset2],
1862 )
1863 # Query for non-empty data IDs, then materialize, then subset to get
1864 # the empty one. Repeat again.
1865 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1866 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1867 self.checkQueryResults(dataIds, [dataId])
1868 self.checkQueryResults(
1869 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1870 [dataset1, dataset2],
1871 )
1872 self.checkQueryResults(
1873 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1874 [dataset1],
1875 )
1876 self.checkQueryResults(
1877 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1878 [dataset2],
1879 )
1880 with dataIds.materialize() as dataIds:
1881 self.checkQueryResults(dataIds, [dataId])
1882 self.checkQueryResults(
1883 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1884 [dataset1, dataset2],
1885 )
1886 self.checkQueryResults(
1887 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1888 [dataset1],
1889 )
1890 self.checkQueryResults(
1891 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1892 [dataset2],
1893 )
1894 # Repeat the materialization tests with a dimension element that isn't
1895 # cached, so there's no way we can know when building the query where
1896 # there are any rows are not (there aren't).
1897 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1898 with dataIds.materialize() as dataIds:
1899 self.checkQueryResults(dataIds, [])
1900 self.checkQueryResults(
1901 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1902 )
1903 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1904 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1905 # Query for non-empty data IDs with a constraint on an empty-data-ID
1906 # dataset that exists.
1907 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1908 self.checkQueryResults(
1909 dataIds.subset(unique=True),
1910 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1911 )
1912 # Again query for non-empty data IDs with a constraint on empty-data-ID
1913 # datasets, but when the datasets don't exist. We delete the existing
1914 # dataset and query just that collection rather than creating a new
1915 # empty collection because this is a bit less likely for our build-time
1916 # logic to shortcut-out (via the collection summaries), and such a
1917 # shortcut would make this test a bit more trivial than we'd like.
1918 registry.removeDatasets([dataset2])
1919 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1920 self.checkQueryResults(dataIds, [])
1922 def testDimensionDataModifications(self):
1923 """Test that modifying dimension records via:
1924 syncDimensionData(..., update=True) and
1925 insertDimensionData(..., replace=True) works as expected, even in the
1926 presence of datasets using those dimensions and spatial overlap
1927 relationships.
1928 """
1930 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1931 """Unpack a sphgeom.RangeSet into the integers it contains."""
1932 for begin, end in ranges:
1933 yield from range(begin, end)
1935 def _range_set_hull(
1936 ranges: lsst.sphgeom.RangeSet,
1937 pixelization: lsst.sphgeom.HtmPixelization,
1938 ) -> lsst.sphgeom.ConvexPolygon:
1939 """Create a ConvexPolygon hull of the region defined by a set of
1940 HTM pixelization index ranges.
1941 """
1942 points = []
1943 for index in _unpack_range_set(ranges):
1944 points.extend(pixelization.triangle(index).getVertices())
1945 return lsst.sphgeom.ConvexPolygon(points)
1947 # Use HTM to set up an initial parent region (one arbitrary trixel)
1948 # and four child regions (the trixels within the parent at the next
1949 # level. We'll use the parent as a tract/visit region and the children
1950 # as its patch/visit_detector regions.
1951 registry = self.makeRegistry()
1952 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1953 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1954 index = 12288
1955 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1956 assert htm6.universe().contains(child_ranges_small)
1957 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1958 parent_region_small = lsst.sphgeom.ConvexPolygon(
1959 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1960 )
1961 assert all(parent_region_small.contains(c) for c in child_regions_small)
1962 # Make a larger version of each child region, defined to be the set of
1963 # htm6 trixels that overlap the original's bounding circle. Make a new
1964 # parent that's the convex hull of the new children.
1965 child_regions_large = [
1966 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1967 ]
1968 assert all(
1969 large.contains(small)
1970 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1971 )
1972 parent_region_large = lsst.sphgeom.ConvexPolygon(
1973 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1974 )
1975 assert all(parent_region_large.contains(c) for c in child_regions_large)
1976 assert parent_region_large.contains(parent_region_small)
1977 assert not parent_region_small.contains(parent_region_large)
1978 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1979 # Find some commonSkyPix indices that overlap the large regions but not
1980 # overlap the small regions. We use commonSkyPix here to make sure the
1981 # real tests later involve what's in the database, not just post-query
1982 # filtering of regions.
1983 child_difference_indices = []
1984 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1985 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1986 assert difference, "if this is empty, we can't test anything useful with these regions"
1987 assert all(
1988 not commonSkyPix.triangle(d).isDisjointFrom(large)
1989 and commonSkyPix.triangle(d).isDisjointFrom(small)
1990 for d in difference
1991 )
1992 child_difference_indices.append(difference)
1993 parent_difference_indices = list(
1994 _unpack_range_set(
1995 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1996 )
1997 )
1998 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1999 assert all(
2000 (
2001 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
2002 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
2003 )
2004 for d in parent_difference_indices
2005 )
2006 # Now that we've finally got those regions, we'll insert the large ones
2007 # as tract/patch dimension records.
2008 skymap_name = "testing_v1"
2009 registry.insertDimensionData(
2010 "skymap",
2011 {
2012 "name": skymap_name,
2013 "hash": bytes([42]),
2014 "tract_max": 1,
2015 "patch_nx_max": 2,
2016 "patch_ny_max": 2,
2017 },
2018 )
2019 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
2020 registry.insertDimensionData(
2021 "patch",
2022 *[
2023 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2024 for n, c in enumerate(child_regions_large)
2025 ],
2026 )
2027 # Add at dataset that uses these dimensions to make sure that modifying
2028 # them doesn't disrupt foreign keys (need to make sure DB doesn't
2029 # implement insert with replace=True as delete-then-insert).
2030 dataset_type = DatasetType(
2031 "coadd",
2032 dimensions=["tract", "patch"],
2033 universe=registry.dimensions,
2034 storageClass="Exposure",
2035 )
2036 registry.registerDatasetType(dataset_type)
2037 registry.registerCollection("the_run", CollectionType.RUN)
2038 registry.insertDatasets(
2039 dataset_type,
2040 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2041 run="the_run",
2042 )
2043 # Query for tracts and patches that overlap some "difference" htm9
2044 # pixels; there should be overlaps, because the database has
2045 # the "large" suite of regions.
2046 self.assertEqual(
2047 {0},
2048 {
2049 data_id["tract"]
2050 for data_id in registry.queryDataIds(
2051 ["tract"],
2052 skymap=skymap_name,
2053 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2054 )
2055 },
2056 )
2057 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2058 self.assertIn(
2059 patch_id,
2060 {
2061 data_id["patch"]
2062 for data_id in registry.queryDataIds(
2063 ["patch"],
2064 skymap=skymap_name,
2065 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2066 )
2067 },
2068 )
2069 # Use sync to update the tract region and insert to update the regions
2070 # of the patches, to the "small" suite.
2071 updated = registry.syncDimensionData(
2072 "tract",
2073 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2074 update=True,
2075 )
2076 self.assertEqual(updated, {"region": parent_region_large})
2077 registry.insertDimensionData(
2078 "patch",
2079 *[
2080 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2081 for n, c in enumerate(child_regions_small)
2082 ],
2083 replace=True,
2084 )
2085 # Query again; there now should be no such overlaps, because the
2086 # database has the "small" suite of regions.
2087 self.assertFalse(
2088 set(
2089 registry.queryDataIds(
2090 ["tract"],
2091 skymap=skymap_name,
2092 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2093 )
2094 )
2095 )
2096 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2097 self.assertNotIn(
2098 patch_id,
2099 {
2100 data_id["patch"]
2101 for data_id in registry.queryDataIds(
2102 ["patch"],
2103 skymap=skymap_name,
2104 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2105 )
2106 },
2107 )
2108 # Update back to the large regions and query one more time.
2109 updated = registry.syncDimensionData(
2110 "tract",
2111 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2112 update=True,
2113 )
2114 self.assertEqual(updated, {"region": parent_region_small})
2115 registry.insertDimensionData(
2116 "patch",
2117 *[
2118 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2119 for n, c in enumerate(child_regions_large)
2120 ],
2121 replace=True,
2122 )
2123 self.assertEqual(
2124 {0},
2125 {
2126 data_id["tract"]
2127 for data_id in registry.queryDataIds(
2128 ["tract"],
2129 skymap=skymap_name,
2130 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2131 )
2132 },
2133 )
2134 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2135 self.assertIn(
2136 patch_id,
2137 {
2138 data_id["patch"]
2139 for data_id in registry.queryDataIds(
2140 ["patch"],
2141 skymap=skymap_name,
2142 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2143 )
2144 },
2145 )
2147 def testCalibrationCollections(self):
2148 """Test operations on `~CollectionType.CALIBRATION` collections,
2149 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2150 `SqlRegistry.findDataset`, and
2151 `DataCoordinateQueryResults.findRelatedDatasets`.
2152 """
2153 # Setup - make a Registry, fill it with some datasets in
2154 # non-calibration collections.
2155 registry = self.makeRegistry()
2156 self.loadData(registry, "base.yaml")
2157 self.loadData(registry, "datasets.yaml")
2158 # Set up some timestamps.
2159 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2160 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2161 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2162 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2163 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2164 allTimespans = [
2165 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2166 ]
2167 # Insert some exposure records with timespans between each sequential
2168 # pair of those.
2169 registry.insertDimensionData(
2170 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2171 )
2172 registry.insertDimensionData(
2173 "group",
2174 {"instrument": "Cam1", "name": "group0"},
2175 {"instrument": "Cam1", "name": "group1"},
2176 {"instrument": "Cam1", "name": "group2"},
2177 {"instrument": "Cam1", "name": "group3"},
2178 )
2179 registry.insertDimensionData(
2180 "exposure",
2181 {
2182 "instrument": "Cam1",
2183 "id": 0,
2184 "group": "group0",
2185 "obs_id": "zero",
2186 "physical_filter": "Cam1-G",
2187 "day_obs": 20200101,
2188 "timespan": Timespan(t1, t2),
2189 },
2190 {
2191 "instrument": "Cam1",
2192 "id": 1,
2193 "group": "group1",
2194 "obs_id": "one",
2195 "physical_filter": "Cam1-G",
2196 "day_obs": 20200101,
2197 "timespan": Timespan(t2, t3),
2198 },
2199 {
2200 "instrument": "Cam1",
2201 "id": 2,
2202 "group": "group2",
2203 "obs_id": "two",
2204 "physical_filter": "Cam1-G",
2205 "day_obs": 20200101,
2206 "timespan": Timespan(t3, t4),
2207 },
2208 {
2209 "instrument": "Cam1",
2210 "id": 3,
2211 "group": "group3",
2212 "obs_id": "three",
2213 "physical_filter": "Cam1-G",
2214 "day_obs": 20200101,
2215 "timespan": Timespan(t4, t5),
2216 },
2217 )
2218 # Get references to some datasets.
2219 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2220 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2221 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2222 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2223 # Register the main calibration collection we'll be working with.
2224 collection = "Cam1/calibs/default"
2225 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2226 # Cannot associate into a calibration collection (no timespan).
2227 with self.assertRaises(CollectionTypeError):
2228 registry.associate(collection, [bias2a])
2229 # Certify 2a dataset with [t2, t4) validity.
2230 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2231 # Test that we can query for this dataset via the new collection, both
2232 # on its own and with a RUN collection.
2233 self.assertEqual(
2234 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2235 {bias2a},
2236 )
2237 self.assertEqual(
2238 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2239 {
2240 bias2a,
2241 bias2b,
2242 bias3b,
2243 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2244 },
2245 )
2246 self.assertEqual(
2247 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2248 {registry.expandDataId(instrument="Cam1", detector=2)},
2249 )
2250 self.assertEqual(
2251 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2252 {
2253 registry.expandDataId(instrument="Cam1", detector=2),
2254 registry.expandDataId(instrument="Cam1", detector=3),
2255 registry.expandDataId(instrument="Cam1", detector=4),
2256 },
2257 )
2258 self.assertEqual(
2259 set(
2260 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2261 "bias", findFirst=True, collections=[collection]
2262 )
2263 ),
2264 {
2265 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2266 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2267 },
2268 )
2269 self.assertEqual(
2270 set(
2271 registry.queryDataIds(
2272 ["exposure", "detector"], instrument="Cam1", detector=2
2273 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2274 ),
2275 {
2276 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2277 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2278 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2279 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2280 },
2281 )
2283 # We should not be able to certify 2b with anything overlapping that
2284 # window.
2285 with self.assertRaises(ConflictingDefinitionError):
2286 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2287 with self.assertRaises(ConflictingDefinitionError):
2288 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2289 with self.assertRaises(ConflictingDefinitionError):
2290 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2291 with self.assertRaises(ConflictingDefinitionError):
2292 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2293 with self.assertRaises(ConflictingDefinitionError):
2294 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2295 with self.assertRaises(ConflictingDefinitionError):
2296 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2297 with self.assertRaises(ConflictingDefinitionError):
2298 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2299 with self.assertRaises(ConflictingDefinitionError):
2300 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2301 # We should be able to certify 3a with a range overlapping that window,
2302 # because it's for a different detector.
2303 # We'll certify 3a over [t1, t3).
2304 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2305 # Now we'll certify 2b and 3b together over [t4, ∞).
2306 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2308 # Fetch all associations and check that they are what we expect.
2309 self.assertCountEqual(
2310 list(
2311 registry.queryDatasetAssociations(
2312 "bias",
2313 collections=[collection, "imported_g", "imported_r"],
2314 )
2315 ),
2316 [
2317 DatasetAssociation(
2318 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2319 collection="imported_g",
2320 timespan=None,
2321 ),
2322 DatasetAssociation(
2323 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2324 collection="imported_r",
2325 timespan=None,
2326 ),
2327 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2328 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2329 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2330 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2331 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2332 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2333 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2334 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2335 ],
2336 )
2338 class Ambiguous:
2339 """Tag class to denote lookups that should be ambiguous."""
2341 pass
2343 def _assertLookup(
2344 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2345 ) -> None:
2346 """Local function that asserts that a bias lookup returns the given
2347 expected result.
2348 """
2349 if expected is Ambiguous:
2350 with self.assertRaises((DatasetTypeError, LookupError)):
2351 registry.findDataset(
2352 "bias",
2353 collections=collection,
2354 instrument="Cam1",
2355 detector=detector,
2356 timespan=timespan,
2357 )
2358 else:
2359 self.assertEqual(
2360 expected,
2361 registry.findDataset(
2362 "bias",
2363 collections=collection,
2364 instrument="Cam1",
2365 detector=detector,
2366 timespan=timespan,
2367 ),
2368 )
2370 # Systematically test lookups against expected results.
2371 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2372 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2373 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2374 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2375 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2376 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2377 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2378 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2379 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2380 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2381 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2382 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2383 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2384 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2385 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2386 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2387 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2388 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2389 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2390 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2391 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2392 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2393 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2394 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2395 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2396 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2397 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2398 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2399 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2400 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2401 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2402 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2403 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2404 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2405 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2406 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2407 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2408 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2409 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2410 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2411 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2412 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2414 # Test lookups via temporal joins to exposures.
2415 self.assertEqual(
2416 set(
2417 registry.queryDataIds(
2418 ["exposure", "detector"], instrument="Cam1", detector=2
2419 ).findRelatedDatasets("bias", collections=[collection])
2420 ),
2421 {
2422 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2423 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2424 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2425 },
2426 )
2427 self.assertEqual(
2428 set(
2429 registry.queryDataIds(
2430 ["exposure", "detector"], instrument="Cam1", detector=3
2431 ).findRelatedDatasets("bias", collections=[collection])
2432 ),
2433 {
2434 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2435 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2436 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2437 },
2438 )
2439 self.assertEqual(
2440 set(
2441 registry.queryDataIds(
2442 ["exposure", "detector"], instrument="Cam1", detector=2
2443 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2444 ),
2445 {
2446 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2447 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2448 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2449 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2450 },
2451 )
2452 self.assertEqual(
2453 set(
2454 registry.queryDataIds(
2455 ["exposure", "detector"], instrument="Cam1", detector=3
2456 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2457 ),
2458 {
2459 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2460 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2461 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2462 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2463 },
2464 )
2466 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2467 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2468 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2469 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2470 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2471 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2472 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2473 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2474 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2475 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2476 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2477 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2478 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2479 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2480 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2481 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2482 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2483 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2484 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2485 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2486 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2487 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2488 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2489 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2490 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2491 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2492 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2493 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2494 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2495 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2496 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2497 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2498 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2499 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2500 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2501 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2502 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2503 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2504 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2505 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2506 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2507 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2508 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2509 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2510 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2511 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2513 # Decertify everything, this time with explicit data IDs, then check
2514 # that no lookups succeed.
2515 registry.decertify(
2516 collection,
2517 "bias",
2518 Timespan(None, None),
2519 dataIds=[
2520 dict(instrument="Cam1", detector=2),
2521 dict(instrument="Cam1", detector=3),
2522 ],
2523 )
2524 for detector in (2, 3):
2525 for timespan in allTimespans:
2526 _assertLookup(detector=detector, timespan=timespan, expected=None)
2527 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2528 # those.
2529 registry.certify(
2530 collection,
2531 [bias2a, bias3a],
2532 Timespan(None, None),
2533 )
2534 for timespan in allTimespans:
2535 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2536 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2537 # Decertify just bias2 over [t2, t4).
2538 # This should split a single certification row into two (and leave the
2539 # other existing row, for bias3a, alone).
2540 registry.decertify(
2541 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2542 )
2543 for timespan in allTimespans:
2544 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2545 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2546 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2547 if overlapsBefore and overlapsAfter:
2548 expected = Ambiguous
2549 elif overlapsBefore or overlapsAfter:
2550 expected = bias2a
2551 else:
2552 expected = None
2553 _assertLookup(detector=2, timespan=timespan, expected=expected)
2555 def testSkipCalibs(self):
2556 """Test how queries handle skipping of calibration collections."""
2557 registry = self.makeRegistry()
2558 self.loadData(registry, "base.yaml")
2559 self.loadData(registry, "datasets.yaml")
2561 coll_calib = "Cam1/calibs/default"
2562 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2564 # Add all biases to the calibration collection.
2565 # Without this, the logic that prunes dataset subqueries based on
2566 # datasetType-collection summary information will fire before the logic
2567 # we want to test below. This is a good thing (it avoids the dreaded
2568 # NotImplementedError a bit more often) everywhere but here.
2569 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2571 coll_list = [coll_calib, "imported_g", "imported_r"]
2572 chain = "Cam1/chain"
2573 registry.registerCollection(chain, type=CollectionType.CHAINED)
2574 registry.setCollectionChain(chain, coll_list)
2576 # explicit list will raise if findFirst=True or there are temporal
2577 # dimensions
2578 with self.assertRaises(NotImplementedError):
2579 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2580 with self.assertRaises(NotImplementedError):
2581 registry.queryDataIds(
2582 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2583 ).count()
2585 # chain will skip
2586 datasets = list(registry.queryDatasets("bias", collections=chain))
2587 self.assertGreater(len(datasets), 0)
2589 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2590 self.assertGreater(len(dataIds), 0)
2592 # glob will skip too
2593 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2594 self.assertGreater(len(datasets), 0)
2596 # regular expression will skip too
2597 pattern = re.compile(".*")
2598 datasets = list(registry.queryDatasets("bias", collections=pattern))
2599 self.assertGreater(len(datasets), 0)
2601 # ellipsis should work as usual
2602 datasets = list(registry.queryDatasets("bias", collections=...))
2603 self.assertGreater(len(datasets), 0)
2605 # few tests with findFirst
2606 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2607 self.assertGreater(len(datasets), 0)
2609 def testIngestTimeQuery(self):
2610 registry = self.makeRegistry()
2611 self.loadData(registry, "base.yaml")
2612 dt0 = datetime.datetime.now(datetime.UTC)
2613 self.loadData(registry, "datasets.yaml")
2614 dt1 = datetime.datetime.now(datetime.UTC)
2616 datasets = list(registry.queryDatasets(..., collections=...))
2617 len0 = len(datasets)
2618 self.assertGreater(len0, 0)
2620 where = "ingest_date > T'2000-01-01'"
2621 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2622 len1 = len(datasets)
2623 self.assertEqual(len0, len1)
2625 # no one will ever use this piece of software in 30 years
2626 where = "ingest_date > T'2050-01-01'"
2627 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2628 len2 = len(datasets)
2629 self.assertEqual(len2, 0)
2631 # Check more exact timing to make sure there is no 37 seconds offset
2632 # (after fixing DM-30124). SQLite time precision is 1 second, make
2633 # sure that we don't test with higher precision.
2634 tests = [
2635 # format: (timestamp, operator, expected_len)
2636 (dt0 - timedelta(seconds=1), ">", len0),
2637 (dt0 - timedelta(seconds=1), "<", 0),
2638 (dt1 + timedelta(seconds=1), "<", len0),
2639 (dt1 + timedelta(seconds=1), ">", 0),
2640 ]
2641 for dt, op, expect_len in tests:
2642 dt_str = dt.isoformat(sep=" ")
2644 where = f"ingest_date {op} T'{dt_str}'"
2645 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2646 self.assertEqual(len(datasets), expect_len)
2648 # same with bind using datetime or astropy Time
2649 where = f"ingest_date {op} ingest_time"
2650 datasets = list(
2651 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2652 )
2653 self.assertEqual(len(datasets), expect_len)
2655 dt_astropy = astropy.time.Time(dt, format="datetime")
2656 datasets = list(
2657 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2658 )
2659 self.assertEqual(len(datasets), expect_len)
2661 def testTimespanQueries(self):
2662 """Test query expressions involving timespans."""
2663 registry = self.makeRegistry()
2664 self.loadData(registry, "hsc-rc2-subset.yaml")
2665 # All exposures in the database; mapping from ID to timespan.
2666 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2667 # Just those IDs, sorted (which is also temporal sorting, because HSC
2668 # exposure IDs are monotonically increasing).
2669 ids = sorted(visits.keys())
2670 self.assertGreater(len(ids), 20)
2671 # Pick some quasi-random indexes into `ids` to play with.
2672 i1 = int(len(ids) * 0.1)
2673 i2 = int(len(ids) * 0.3)
2674 i3 = int(len(ids) * 0.6)
2675 i4 = int(len(ids) * 0.8)
2676 # Extract some times from those: just before the beginning of i1 (which
2677 # should be after the end of the exposure before), exactly the
2678 # beginning of i2, just after the beginning of i3 (and before its end),
2679 # and the exact end of i4.
2680 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2681 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2682 t2 = visits[ids[i2]].begin
2683 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2684 self.assertLess(t3, visits[ids[i3]].end)
2685 t4 = visits[ids[i4]].end
2686 # Make sure those are actually in order.
2687 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2689 bind = {
2690 "t1": t1,
2691 "t2": t2,
2692 "t3": t3,
2693 "t4": t4,
2694 "ts23": Timespan(t2, t3),
2695 }
2697 def query(where):
2698 """Return results as a sorted, deduplicated list of visit IDs.
2700 Parameters
2701 ----------
2702 where : `str`
2703 The WHERE clause for the query.
2704 """
2705 return sorted(
2706 {
2707 dataId["visit"]
2708 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2709 }
2710 )
2712 # Try a bunch of timespan queries, mixing up the bounds themselves,
2713 # where they appear in the expression, and how we get the timespan into
2714 # the expression.
2716 # t1 is before the start of i1, so this should not include i1.
2717 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2718 # t2 is exactly at the start of i2, but ends are exclusive, so these
2719 # should not include i2.
2720 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2721 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2722 # t3 is in the middle of i3, so this should include i3.
2723 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2724 # This one should not include t3 by the same reasoning.
2725 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2726 # t4 is exactly at the end of i4, so this should include i4.
2727 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2728 # i4's upper bound of t4 is exclusive so this should not include t4.
2729 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2731 # Now some timespan vs. time scalar queries.
2732 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2733 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2734 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2735 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2736 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2737 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2739 # Empty timespans should not overlap anything.
2740 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2742 def testCollectionSummaries(self):
2743 """Test recording and retrieval of collection summaries."""
2744 self.maxDiff = None
2745 registry = self.makeRegistry()
2746 # Importing datasets from yaml should go through the code path where
2747 # we update collection summaries as we insert datasets.
2748 self.loadData(registry, "base.yaml")
2749 self.loadData(registry, "datasets.yaml")
2750 flat = registry.getDatasetType("flat")
2751 expected1 = CollectionSummary()
2752 expected1.dataset_types.add(registry.getDatasetType("bias"))
2753 expected1.add_data_ids(
2754 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2755 )
2756 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2757 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2758 # Create a chained collection with both of the imported runs; the
2759 # summary should be the same, because it's a union with itself.
2760 chain = "chain"
2761 registry.registerCollection(chain, CollectionType.CHAINED)
2762 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2763 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2764 # Associate flats only into a tagged collection and a calibration
2765 # collection to check summaries of those.
2766 tag = "tag"
2767 registry.registerCollection(tag, CollectionType.TAGGED)
2768 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2769 calibs = "calibs"
2770 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2771 registry.certify(
2772 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2773 )
2774 expected2 = expected1.copy()
2775 expected2.dataset_types.discard("bias")
2776 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2777 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2778 # Explicitly calling SqlRegistry.refresh() should load those same
2779 # summaries, via a totally different code path.
2780 registry.refresh()
2781 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2782 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2783 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2784 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2786 def testBindInQueryDatasets(self):
2787 """Test that the bind parameter is correctly forwarded in
2788 queryDatasets recursion.
2789 """
2790 registry = self.makeRegistry()
2791 # Importing datasets from yaml should go through the code path where
2792 # we update collection summaries as we insert datasets.
2793 self.loadData(registry, "base.yaml")
2794 self.loadData(registry, "datasets.yaml")
2795 self.assertEqual(
2796 set(registry.queryDatasets("flat", band="r", collections=...)),
2797 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2798 )
2800 def testQueryIntRangeExpressions(self):
2801 """Test integer range expressions in ``where`` arguments.
2803 Note that our expressions use inclusive stop values, unlike Python's.
2804 """
2805 registry = self.makeRegistry()
2806 self.loadData(registry, "base.yaml")
2807 self.assertEqual(
2808 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2809 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2810 )
2811 self.assertEqual(
2812 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2813 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2814 )
2815 self.assertEqual(
2816 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2817 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2818 )
2820 def testQueryResultSummaries(self):
2821 """Test summary methods like `count`, `any`, and `explain_no_results`
2822 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2823 """
2824 registry = self.makeRegistry()
2825 self.loadData(registry, "base.yaml")
2826 self.loadData(registry, "datasets.yaml")
2827 self.loadData(registry, "spatial.yaml")
2828 # Default test dataset has two collections, each with both flats and
2829 # biases. Add a new collection with only biases.
2830 registry.registerCollection("biases", CollectionType.TAGGED)
2831 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2832 # First query yields two results, and involves no postprocessing.
2833 query1 = registry.queryDataIds(["physical_filter"], band="r")
2834 self.assertTrue(query1.any(execute=False, exact=False))
2835 self.assertTrue(query1.any(execute=True, exact=False))
2836 self.assertTrue(query1.any(execute=True, exact=True))
2837 self.assertEqual(query1.count(exact=False), 2)
2838 self.assertEqual(query1.count(exact=True), 2)
2839 self.assertFalse(list(query1.explain_no_results()))
2840 # Second query should yield no results, which we should see when
2841 # we attempt to expand the data ID.
2842 query2 = registry.queryDataIds(["physical_filter"], band="h")
2843 # There's no execute=False, exact=Fals test here because the behavior
2844 # not something we want to guarantee in this case (and exact=False
2845 # says either answer is legal).
2846 self.assertFalse(query2.any(execute=True, exact=False))
2847 self.assertFalse(query2.any(execute=True, exact=True))
2848 self.assertEqual(query2.count(exact=False), 0)
2849 self.assertEqual(query2.count(exact=True), 0)
2850 self.assertTrue(list(query2.explain_no_results()))
2851 # These queries yield no results due to various problems that can be
2852 # spotted prior to execution, yielding helpful diagnostics.
2853 base_query = registry.queryDataIds(["detector", "physical_filter"])
2854 queries_and_snippets = [
2855 (
2856 # Dataset type name doesn't match any existing dataset types.
2857 registry.queryDatasets("nonexistent", collections=...),
2858 ["nonexistent"],
2859 ),
2860 (
2861 # Dataset type object isn't registered.
2862 registry.queryDatasets(
2863 DatasetType(
2864 "nonexistent",
2865 dimensions=["instrument"],
2866 universe=registry.dimensions,
2867 storageClass="Image",
2868 ),
2869 collections=...,
2870 ),
2871 ["nonexistent"],
2872 ),
2873 (
2874 # No datasets of this type in this collection.
2875 registry.queryDatasets("flat", collections=["biases"]),
2876 ["flat", "biases"],
2877 ),
2878 (
2879 # No datasets of this type in this collection.
2880 base_query.findDatasets("flat", collections=["biases"]),
2881 ["flat", "biases"],
2882 ),
2883 (
2884 # No collections matching at all.
2885 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2886 ["potato"],
2887 ),
2888 ]
2889 with self.assertRaises(MissingDatasetTypeError):
2890 # Dataset type name doesn't match any existing dataset types.
2891 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2892 with self.assertRaises(MissingDatasetTypeError):
2893 # Dataset type name doesn't match any existing dataset types.
2894 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...).any()
2895 with self.assertRaises(DatasetTypeExpressionError):
2896 # queryDimensionRecords does not allow dataset type wildcards.
2897 registry.queryDimensionRecords("detector", datasets=["f*"], collections=...).any()
2898 for query, snippets in queries_and_snippets:
2899 self.assertFalse(query.any(execute=False, exact=False))
2900 self.assertFalse(query.any(execute=True, exact=False))
2901 self.assertFalse(query.any(execute=True, exact=True))
2902 self.assertEqual(query.count(exact=False), 0)
2903 self.assertEqual(query.count(exact=True), 0)
2904 messages = list(query.explain_no_results())
2905 self.assertTrue(messages)
2906 # Want all expected snippets to appear in at least one message.
2907 self.assertTrue(
2908 any(
2909 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2910 ),
2911 messages,
2912 )
2914 # Wildcards on dataset types are not permitted in queryDataIds.
2915 with self.assertRaises(DatasetTypeExpressionError):
2916 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2918 # These queries yield no results due to problems that can be identified
2919 # by cheap follow-up queries, yielding helpful diagnostics.
2920 if self.supportsDetailedQueryExplain:
2921 for query, snippets in [
2922 (
2923 # No records for one of the involved dimensions.
2924 registry.queryDataIds(["subfilter"]),
2925 ["no rows", "subfilter"],
2926 ),
2927 (
2928 # No records for one of the involved dimensions.
2929 registry.queryDimensionRecords("subfilter"),
2930 ["no rows", "subfilter"],
2931 ),
2932 ]:
2933 self.assertFalse(query.any(execute=True, exact=False))
2934 self.assertFalse(query.any(execute=True, exact=True))
2935 self.assertEqual(query.count(exact=True), 0)
2936 messages = list(query.explain_no_results())
2937 self.assertTrue(messages)
2938 # Want all expected snippets to appear in at least one message.
2939 self.assertTrue(
2940 any(
2941 all(snippet in message for snippet in snippets)
2942 for message in query.explain_no_results()
2943 ),
2944 messages,
2945 )
2947 # This query yields four overlaps in the database, but one is filtered
2948 # out in postprocessing. The count queries aren't accurate because
2949 # they don't account for duplication that happens due to an internal
2950 # join against commonSkyPix.
2951 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2952 self.assertEqual(
2953 {
2954 DataCoordinate.standardize(
2955 instrument="Cam1",
2956 skymap="SkyMap1",
2957 visit=v,
2958 tract=t,
2959 universe=registry.dimensions,
2960 )
2961 for v, t in [(1, 0), (2, 0), (2, 1)]
2962 },
2963 set(query3),
2964 )
2965 self.assertTrue(query3.any(execute=False, exact=False))
2966 self.assertTrue(query3.any(execute=True, exact=False))
2967 self.assertTrue(query3.any(execute=True, exact=True))
2968 self.assertGreaterEqual(query3.count(exact=False), 4)
2969 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2970 self.assertFalse(list(query3.explain_no_results()))
2971 # This query yields overlaps in the database, but all are filtered
2972 # out in postprocessing. The count queries again aren't very useful.
2973 # We have to use `where=` here to avoid an optimization that
2974 # (currently) skips the spatial postprocess-filtering because it
2975 # recognizes that no spatial join is necessary. That's not ideal, but
2976 # fixing it is out of scope for this ticket.
2977 query4 = registry.queryDataIds(
2978 ["visit", "tract"],
2979 instrument="Cam1",
2980 skymap="SkyMap1",
2981 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2982 )
2983 self.assertFalse(set(query4))
2984 self.assertTrue(query4.any(execute=False, exact=False))
2985 self.assertTrue(query4.any(execute=True, exact=False))
2986 self.assertFalse(query4.any(execute=True, exact=True))
2987 self.assertGreaterEqual(query4.count(exact=False), 1)
2988 self.assertEqual(query4.count(exact=True, discard=True), 0)
2989 messages = query4.explain_no_results()
2990 self.assertTrue(messages)
2991 self.assertTrue(any("overlap" in message for message in messages))
2992 # This query should yield results from one dataset type but not the
2993 # other, which is not registered.
2994 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2995 self.assertTrue(set(query5))
2996 self.assertTrue(query5.any(execute=False, exact=False))
2997 self.assertTrue(query5.any(execute=True, exact=False))
2998 self.assertTrue(query5.any(execute=True, exact=True))
2999 self.assertGreaterEqual(query5.count(exact=False), 1)
3000 self.assertGreaterEqual(query5.count(exact=True), 1)
3001 self.assertFalse(list(query5.explain_no_results()))
3002 # This query applies a selection that yields no results, fully in the
3003 # database. Explaining why it fails involves traversing the relation
3004 # tree and running a LIMIT 1 query at each level that has the potential
3005 # to remove rows.
3006 query6 = registry.queryDimensionRecords(
3007 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
3008 )
3009 self.assertEqual(query6.count(exact=True), 0)
3010 self.assertFalse(query6.any())
3011 if self.supportsDetailedQueryExplain:
3012 messages = query6.explain_no_results()
3013 self.assertTrue(messages)
3014 self.assertTrue(any("no-purpose" in message for message in messages))
3016 def testQueryDataIdsExpressionError(self):
3017 """Test error checking of 'where' expressions in queryDataIds."""
3018 registry = self.makeRegistry()
3019 self.loadData(registry, "base.yaml")
3020 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
3021 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
3022 registry.queryDataIds(["detector"], where="foo.bar = 12")
3023 with self.assertRaisesRegex(
3024 LookupError, "Dimension element name cannot be inferred in this context."
3025 ):
3026 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
3028 def testQueryDataIdsOrderBy(self):
3029 """Test order_by and limit on result returned by queryDataIds()."""
3030 registry = self.makeRegistry()
3031 self.loadData(registry, "base.yaml")
3032 self.loadData(registry, "datasets.yaml")
3033 self.loadData(registry, "spatial.yaml")
3035 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
3036 return registry.queryDataIds(
3037 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
3038 )
3040 Test = namedtuple(
3041 "testQueryDataIdsOrderByTest",
3042 ("order_by", "keys", "result", "limit", "datasets", "collections"),
3043 defaults=(None, None, None),
3044 )
3046 test_data = (
3047 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3048 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
3049 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
3050 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
3051 Test(
3052 "tract.id,visit.id",
3053 "tract,visit",
3054 ((0, 1), (0, 1), (0, 2)),
3055 limit=(3,),
3056 ),
3057 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
3058 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
3059 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
3060 Test(
3061 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
3062 ),
3063 Test(
3064 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
3065 ),
3066 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3067 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
3068 Test(
3069 "tract,-visit.timespan.begin,visit.timespan.end",
3070 "tract,visit",
3071 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
3072 ),
3073 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3074 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3075 Test(
3076 "tract,detector",
3077 "tract,detector",
3078 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3079 datasets="flat",
3080 collections="imported_r",
3081 ),
3082 Test(
3083 "tract,detector.full_name",
3084 "tract,detector",
3085 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3086 datasets="flat",
3087 collections="imported_r",
3088 ),
3089 Test(
3090 "tract,detector.raft,detector.name_in_raft",
3091 "tract,detector",
3092 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3093 datasets="flat",
3094 collections="imported_r",
3095 ),
3096 )
3098 for test in test_data:
3099 order_by = test.order_by.split(",")
3100 keys = test.keys.split(",")
3101 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3102 if test.limit is not None:
3103 query = query.limit(*test.limit)
3104 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3105 self.assertEqual(dataIds, test.result)
3107 # and materialize
3108 query = do_query(keys).order_by(*order_by)
3109 if test.limit is not None:
3110 query = query.limit(*test.limit)
3111 with self.assertRaises(RelationalAlgebraError):
3112 with query.materialize():
3113 pass
3115 # errors in a name
3116 for order_by in ("", "-"):
3117 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3118 list(do_query().order_by(order_by))
3120 for order_by in ("undimension.name", "-undimension.name"):
3121 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
3122 list(do_query().order_by(order_by))
3124 for order_by in ("attract", "-attract"):
3125 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3126 list(do_query().order_by(order_by))
3128 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3129 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3131 with self.assertRaisesRegex(
3132 ValueError,
3133 r"Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3134 r"qualify timespan with specific dimension name\.",
3135 ):
3136 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3138 with self.assertRaisesRegex(
3139 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3140 ):
3141 list(do_query("tract").order_by("timespan.begin"))
3143 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3144 list(do_query("tract").order_by("tract.timespan.begin"))
3146 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3147 list(do_query("tract").order_by("tract.name"))
3149 with self.assertRaisesRegex(
3150 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3151 ):
3152 list(do_query("visit").order_by("timestamp.begin"))
3154 def testQueryDataIdsGovernorExceptions(self):
3155 """Test exceptions raised by queryDataIds() for incorrect governors."""
3156 registry = self.makeRegistry()
3157 self.loadData(registry, "base.yaml")
3158 self.loadData(registry, "datasets.yaml")
3159 self.loadData(registry, "spatial.yaml")
3161 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3162 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3164 Test = namedtuple(
3165 "testQueryDataIdExceptionsTest",
3166 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3167 defaults=(None, None, None, {}, None, 0),
3168 )
3170 test_data = (
3171 Test("tract,visit", count=6),
3172 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3173 Test(
3174 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3175 ),
3176 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3177 Test(
3178 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3179 ),
3180 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3181 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3182 Test(
3183 "tract,visit",
3184 where="instrument=cam AND skymap=map",
3185 bind={"cam": "Cam1", "map": "SkyMap1"},
3186 count=6,
3187 ),
3188 Test(
3189 "tract,visit",
3190 where="instrument=cam AND skymap=map",
3191 bind={"cam": "Cam", "map": "SkyMap"},
3192 exception=DataIdValueError,
3193 ),
3194 )
3196 for test in test_data:
3197 dimensions = test.dimensions.split(",")
3198 if test.exception:
3199 with self.assertRaises(test.exception):
3200 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3201 else:
3202 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3203 self.assertEqual(query.count(discard=True), test.count)
3205 # and materialize
3206 if test.exception:
3207 with self.assertRaises(test.exception):
3208 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3209 with query.materialize() as materialized:
3210 materialized.count(discard=True)
3211 else:
3212 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3213 with query.materialize() as materialized:
3214 self.assertEqual(materialized.count(discard=True), test.count)
3216 def testQueryDimensionRecordsOrderBy(self):
3217 """Test order_by and limit on result returned by
3218 queryDimensionRecords().
3219 """
3220 registry = self.makeRegistry()
3221 self.loadData(registry, "base.yaml")
3222 self.loadData(registry, "datasets.yaml")
3223 self.loadData(registry, "spatial.yaml")
3225 def do_query(element, datasets=None, collections=None):
3226 return registry.queryDimensionRecords(
3227 element, instrument="Cam1", datasets=datasets, collections=collections
3228 )
3230 query = do_query("detector")
3231 self.assertEqual(len(list(query)), 4)
3233 Test = namedtuple(
3234 "testQueryDataIdsOrderByTest",
3235 ("element", "order_by", "result", "limit", "datasets", "collections"),
3236 defaults=(None, None, None),
3237 )
3239 test_data = [
3240 Test("detector", "detector", (1, 2, 3, 4)),
3241 Test("detector", "-detector", (4, 3, 2, 1)),
3242 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3243 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3244 Test("visit", "visit", (1, 2)),
3245 Test("visit", "-visit.id", (2, 1)),
3246 Test("visit", "zenith_angle", (1, 2)),
3247 Test("visit", "-visit.name", (2, 1)),
3248 Test("visit", "day_obs,-visit.timespan.begin", (2, 1)),
3249 ]
3250 if self.supportsQueryOffset:
3251 test_data.append(Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)))
3253 for test in test_data:
3254 order_by = test.order_by.split(",")
3255 query = do_query(test.element).order_by(*order_by)
3256 if test.limit is not None:
3257 query = query.limit(*test.limit)
3258 dataIds = tuple(rec.id for rec in query)
3259 self.assertEqual(dataIds, test.result)
3261 # errors in a name
3262 for order_by in ("", "-"):
3263 with self.assertRaisesRegex(
3264 (ValueError, InvalidQueryError),
3265 "(Empty dimension name in ORDER BY)|(Unrecognized identifier)",
3266 ):
3267 list(do_query("detector").order_by(order_by))
3269 for order_by in ("undimension.name", "-undimension.name"):
3270 with self.assertRaisesRegex(
3271 (ValueError, InvalidQueryError),
3272 "(Element name mismatch: 'undimension')|(Unrecognized identifier)",
3273 ):
3274 list(do_query("detector").order_by(order_by))
3276 for order_by in ("attract", "-attract"):
3277 with self.assertRaisesRegex(
3278 (ValueError, InvalidQueryError),
3279 "(Field 'attract' does not exist in 'detector'.)|(Unrecognized identifier)",
3280 ):
3281 list(do_query("detector").order_by(order_by))
3283 for order_by in ("timestamp.begin", "-timestamp.begin"):
3284 with self.assertRaisesRegex(
3285 (ValueError, InvalidQueryError),
3286 r"(Element name mismatch: 'timestamp' instead of 'visit'; "
3287 r"perhaps you meant 'timespan.begin'\?)"
3288 r"|(Unrecognized identifier)",
3289 ):
3290 list(do_query("visit").order_by(order_by))
3292 def testQueryDimensionRecordsExceptions(self):
3293 """Test exceptions raised by queryDimensionRecords()."""
3294 registry = self.makeRegistry()
3295 self.loadData(registry, "base.yaml")
3296 self.loadData(registry, "datasets.yaml")
3297 self.loadData(registry, "spatial.yaml")
3299 result = registry.queryDimensionRecords("detector")
3300 self.assertEqual(result.count(), 4)
3301 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3302 self.assertEqual(result.count(), 4)
3303 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3304 self.assertEqual(result.count(), 4)
3306 # Test that values specified in kwargs override those specified in
3307 # dataId.
3308 result = registry.queryDimensionRecords(
3309 "detector", dataId={"instrument": "NotCam1"}, instrument="Cam1"
3310 )
3311 self.assertEqual(result.count(), 4)
3313 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3314 self.assertEqual(result.count(), 4)
3315 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3316 self.assertTrue(result.any())
3317 self.assertEqual(result.count(), 4)
3319 if self.supportsQueryGovernorValidation:
3320 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3321 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3322 result.count()
3324 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3325 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3326 result.count()
3328 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3329 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3330 result.count()
3332 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3333 result = registry.queryDimensionRecords(
3334 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3335 )
3336 result.count()
3338 def testDatasetConstrainedDimensionRecordQueries(self):
3339 """Test that queryDimensionRecords works even when given a dataset
3340 constraint whose dimensions extend beyond the requested dimension
3341 element's.
3342 """
3343 registry = self.makeRegistry()
3344 self.loadData(registry, "base.yaml")
3345 self.loadData(registry, "datasets.yaml")
3346 # Query for physical_filter dimension records, using a dataset that
3347 # has both physical_filter and dataset dimensions.
3348 records = registry.queryDimensionRecords(
3349 "physical_filter",
3350 datasets=["flat"],
3351 collections="imported_r",
3352 )
3353 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3354 # Trying to constrain by all dataset types is an error.
3355 with self.assertRaises(TypeError):
3356 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3358 def testSkyPixDatasetQueries(self):
3359 """Test that we can build queries involving skypix dimensions as long
3360 as a dataset type that uses those dimensions is included.
3361 """
3362 registry = self.makeRegistry()
3363 self.loadData(registry, "base.yaml")
3364 dataset_type = DatasetType(
3365 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3366 )
3367 registry.registerDatasetType(dataset_type)
3368 run = "r"
3369 registry.registerRun(run)
3370 # First try queries where there are no datasets; the concern is whether
3371 # we can even build and execute these queries without raising, even
3372 # when "doomed" query shortcuts are in play.
3373 self.assertFalse(
3374 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3375 )
3376 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3377 # Now add a dataset and see that we can get it back.
3378 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3379 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3380 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3381 self.assertEqual(
3382 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3383 {data_id},
3384 )
3385 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3387 def testDatasetIdFactory(self):
3388 """Simple test for DatasetIdFactory, mostly to catch potential changes
3389 in its API.
3390 """
3391 registry = self.makeRegistry()
3392 factory = DatasetIdFactory()
3393 dataset_type = DatasetType(
3394 "datasetType",
3395 dimensions=["detector", "instrument"],
3396 universe=registry.dimensions,
3397 storageClass="int",
3398 )
3399 run = "run"
3400 data_id = DataCoordinate.standardize(
3401 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3402 )
3404 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3405 self.assertIsInstance(datasetId, uuid.UUID)
3406 self.assertEqual(datasetId.version, 4)
3408 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3409 self.assertIsInstance(datasetId, uuid.UUID)
3410 self.assertEqual(datasetId.version, 5)
3412 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3413 self.assertIsInstance(datasetId, uuid.UUID)
3414 self.assertEqual(datasetId.version, 5)
3416 def testExposureQueries(self):
3417 """Test query methods using arguments sourced from the exposure log
3418 service.
3420 The most complete test dataset currently available to daf_butler tests
3421 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3422 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3423 dimension records as it was focused on providing nontrivial spatial
3424 overlaps between visit+detector and tract+patch. So in this test we
3425 need to translate queries that originally used the exposure dimension
3426 to use the (very similar) visit dimension instead.
3427 """
3428 registry = self.makeRegistry()
3429 self.loadData(registry, "hsc-rc2-subset.yaml")
3430 self.assertEqual(
3431 [
3432 record.id
3433 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3434 .order_by("visit")
3435 .limit(5)
3436 ],
3437 [318, 322, 326, 330, 332],
3438 )
3439 self.assertEqual(
3440 [
3441 data_id["visit"]
3442 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3443 ],
3444 [318, 322, 326, 330, 332],
3445 )
3446 self.assertEqual(
3447 [
3448 record.id
3449 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3450 .order_by("full_name")
3451 .limit(5)
3452 ],
3453 [73, 72, 71, 70, 65],
3454 )
3455 self.assertEqual(
3456 [
3457 data_id["detector"]
3458 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3459 .order_by("full_name")
3460 .limit(5)
3461 ],
3462 [73, 72, 71, 70, 65],
3463 )
3465 def test_long_query_names(self) -> None:
3466 """Test that queries involving very long names are handled correctly.
3468 This is especially important for PostgreSQL, which truncates symbols
3469 longer than 64 chars, but it's worth testing for all DBs.
3470 """
3471 registry = self.makeRegistry()
3472 name = "abcd" * 17
3473 registry.registerDatasetType(
3474 DatasetType(
3475 name,
3476 dimensions=(),
3477 storageClass="Exposure",
3478 universe=registry.dimensions,
3479 )
3480 )
3481 # Need to search more than one collection actually containing a
3482 # matching dataset to avoid optimizations that sidestep bugs due to
3483 # truncation by making findFirst=True a no-op.
3484 run1 = "run1"
3485 registry.registerRun(run1)
3486 run2 = "run2"
3487 registry.registerRun(run2)
3488 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3489 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3490 self.assertEqual(
3491 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3492 {ref1},
3493 )
3495 def test_skypix_constraint_queries(self) -> None:
3496 """Test queries spatially constrained by a skypix data ID."""
3497 registry = self.makeRegistry()
3498 self.loadData(registry, "hsc-rc2-subset.yaml")
3499 patch_regions = {
3500 (data_id["tract"], data_id["patch"]): data_id.region
3501 for data_id in registry.queryDataIds(["patch"]).expanded()
3502 }
3503 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3504 # This check ensures the test doesn't become trivial due to a config
3505 # change; if it does, just pick a different HTML level.
3506 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3507 # Gather all skypix IDs that definitely overlap at least one of these
3508 # patches.
3509 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3510 for patch_region in patch_regions.values():
3511 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3512 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3513 # and does not overlap at least one other patch.
3514 for skypix_id in itertools.chain.from_iterable(
3515 range(begin, end) for begin, end in relevant_skypix_ids
3516 ):
3517 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3518 overlapping_patches = {
3519 patch_key
3520 for patch_key, patch_region in patch_regions.items()
3521 if not patch_region.isDisjointFrom(skypix_region)
3522 }
3523 if overlapping_patches and overlapping_patches != patch_regions.keys():
3524 break
3525 else:
3526 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3527 self.assertEqual(
3528 {
3529 (data_id["tract"], data_id["patch"])
3530 for data_id in registry.queryDataIds(
3531 ["patch"],
3532 dataId={skypix_dimension.name: skypix_id},
3533 )
3534 },
3535 overlapping_patches,
3536 )
3537 # Test that a three-way join that includes the common skypix system in
3538 # the dimensions doesn't generate redundant join terms in the query.
3539 full_data_ids = set(
3540 registry.queryDataIds(
3541 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3542 ).expanded()
3543 )
3544 self.assertGreater(len(full_data_ids), 0)
3545 for data_id in full_data_ids:
3546 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3547 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3549 def test_spatial_constraint_queries(self) -> None:
3550 """Test queries in which one spatial dimension in the constraint (data
3551 ID or ``where`` string) constrains a different spatial dimension in the
3552 query result columns.
3553 """
3554 registry = self.makeRegistry()
3555 self.loadData(registry, "hsc-rc2-subset.yaml")
3556 patch_regions = {
3557 (data_id["tract"], data_id["patch"]): data_id.region
3558 for data_id in registry.queryDataIds(["patch"]).expanded()
3559 }
3560 observation_regions = {
3561 (data_id["visit"], data_id["detector"]): data_id.region
3562 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3563 }
3564 all_combos = {
3565 (patch_key, observation_key)
3566 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3567 }
3568 overlapping_combos = {
3569 (patch_key, observation_key)
3570 for patch_key, observation_key in all_combos
3571 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3572 }
3573 # Check a direct spatial join with no constraint first.
3574 self.assertEqual(
3575 {
3576 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3577 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3578 },
3579 overlapping_combos,
3580 )
3581 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3582 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3583 for patch_key, observation_key in overlapping_combos:
3584 overlaps_by_patch[patch_key].add(observation_key)
3585 overlaps_by_observation[observation_key].add(patch_key)
3586 # Find patches and observations that overlap at least one of the other
3587 # but not all of the other.
3588 nontrivial_patch = next(
3589 iter(
3590 patch_key
3591 for patch_key, observation_keys in overlaps_by_patch.items()
3592 if observation_keys and observation_keys != observation_regions.keys()
3593 )
3594 )
3595 nontrivial_observation = next(
3596 iter(
3597 observation_key
3598 for observation_key, patch_keys in overlaps_by_observation.items()
3599 if patch_keys and patch_keys != patch_regions.keys()
3600 )
3601 )
3602 # Use the nontrivial patches and observations as constraints on the
3603 # other dimensions in various ways, first via a 'where' expression.
3604 # It's better in general to us 'bind' instead of f-strings, but these
3605 # all integers so there are no quoting concerns.
3606 self.assertEqual(
3607 {
3608 (data_id["visit"], data_id["detector"])
3609 for data_id in registry.queryDataIds(
3610 ["visit", "detector"],
3611 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3612 skymap="hsc_rings_v1",
3613 )
3614 },
3615 overlaps_by_patch[nontrivial_patch],
3616 )
3617 self.assertEqual(
3618 {
3619 (data_id["tract"], data_id["patch"])
3620 for data_id in registry.queryDataIds(
3621 ["patch"],
3622 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3623 instrument="HSC",
3624 )
3625 },
3626 overlaps_by_observation[nontrivial_observation],
3627 )
3628 # and then via the dataId argument.
3629 self.assertEqual(
3630 {
3631 (data_id["visit"], data_id["detector"])
3632 for data_id in registry.queryDataIds(
3633 ["visit", "detector"],
3634 dataId={
3635 "tract": nontrivial_patch[0],
3636 "patch": nontrivial_patch[1],
3637 },
3638 skymap="hsc_rings_v1",
3639 )
3640 },
3641 overlaps_by_patch[nontrivial_patch],
3642 )
3643 self.assertEqual(
3644 {
3645 (data_id["tract"], data_id["patch"])
3646 for data_id in registry.queryDataIds(
3647 ["patch"],
3648 dataId={
3649 "visit": nontrivial_observation[0],
3650 "detector": nontrivial_observation[1],
3651 },
3652 instrument="HSC",
3653 )
3654 },
3655 overlaps_by_observation[nontrivial_observation],
3656 )
3658 def test_query_projection_drop_postprocessing(self) -> None:
3659 """Test that projections and deduplications on query objects can
3660 drop post-query region filtering to ensure the query remains in
3661 the SQL engine.
3662 """
3663 registry = self.makeRegistry()
3664 self.loadData(registry, "base.yaml")
3665 self.loadData(registry, "spatial.yaml")
3667 def pop_transfer(tree: Relation) -> Relation:
3668 """If a relation tree terminates with a transfer to a new engine,
3669 return the relation prior to that transfer. If not, return the
3670 original relation.
3672 Parameters
3673 ----------
3674 tree : `Relation`
3675 The relation tree to modify.
3676 """
3677 match tree:
3678 case Transfer(target=target):
3679 return target
3680 case _:
3681 return tree
3683 # There's no public way to get a Query object yet, so we get one from a
3684 # DataCoordinateQueryResults private attribute. When a public API is
3685 # available this test should use it.
3686 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3687 # We expect this query to terminate in the iteration engine originally,
3688 # because region-filtering is necessary.
3689 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3690 # If we deduplicate, we usually have to do that downstream of the
3691 # filtering. That means the deduplication has to happen in the
3692 # iteration engine.
3693 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3694 # If we pass drop_postprocessing, we instead drop the region filtering
3695 # so the deduplication can happen in SQL (though there might still be
3696 # transfer to iteration at the tail of the tree that we can ignore;
3697 # that's what the pop_transfer takes care of here).
3698 self.assertIsInstance(
3699 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3700 sql.Engine,
3701 )
3703 def test_query_find_datasets_drop_postprocessing(self) -> None:
3704 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3705 problems with the FindFirstDataset relation operation.
3706 """
3707 # Setup: load some visit, tract, and patch records, and insert two
3708 # datasets with dimensions {visit, patch}, with one in each of two
3709 # RUN collections.
3710 registry = self.makeRegistry()
3711 self.loadData(registry, "base.yaml")
3712 self.loadData(registry, "spatial.yaml")
3713 storage_class = StorageClass("Warpy")
3714 registry.storageClasses.registerStorageClass(storage_class)
3715 dataset_type = DatasetType(
3716 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3717 )
3718 registry.registerDatasetType(dataset_type)
3719 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3720 registry.registerRun("run1")
3721 registry.registerRun("run2")
3722 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3723 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3724 # Query for the dataset using queryDataIds(...).findDatasets(...)
3725 # against only one of the two collections. This should work even
3726 # though the relation returned by queryDataIds ends with
3727 # iteration-engine region-filtering, because we can recognize before
3728 # running the query that there is only one collecton to search and
3729 # hence the (default) findFirst=True is irrelevant, and joining in the
3730 # dataset query commutes past the iteration-engine postprocessing.
3731 query1 = registry.queryDataIds(
3732 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3733 )
3734 self.assertEqual(
3735 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3736 {ref1},
3737 )
3738 # Query for the dataset using queryDataIds(...).findDatasets(...)
3739 # against both collections. This can only work if the FindFirstDataset
3740 # operation can be commuted past the iteration-engine options into SQL.
3741 query2 = registry.queryDataIds(
3742 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3743 )
3744 self.assertEqual(
3745 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3746 {ref2},
3747 )
3749 def test_query_empty_collections(self) -> None:
3750 """Test for registry query methods with empty collections. The methods
3751 should return empty result set (or None when applicable) and provide
3752 "doomed" diagnostics.
3753 """
3754 registry = self.makeRegistry()
3755 self.loadData(registry, "base.yaml")
3756 self.loadData(registry, "datasets.yaml")
3758 # Tests for registry.findDataset()
3759 with self.assertRaises(NoDefaultCollectionError):
3760 registry.findDataset("bias", instrument="Cam1", detector=1)
3761 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3762 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3764 # Tests for registry.queryDatasets()
3765 with self.assertRaises(NoDefaultCollectionError):
3766 registry.queryDatasets("bias")
3767 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3769 result = registry.queryDatasets("bias", collections=[])
3770 self.assertEqual(len(list(result)), 0)
3771 messages = list(result.explain_no_results())
3772 self.assertTrue(messages)
3773 self.assertTrue(any("because collection list is empty" in message for message in messages))
3775 # Tests for registry.queryDataIds()
3776 with self.assertRaises(NoDefaultCollectionError):
3777 registry.queryDataIds("detector", datasets="bias")
3778 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3780 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3781 self.assertEqual(len(list(result)), 0)
3782 messages = list(result.explain_no_results())
3783 self.assertTrue(messages)
3784 self.assertTrue(any("because collection list is empty" in message for message in messages))
3786 # Tests for registry.queryDimensionRecords()
3787 with self.assertRaises(NoDefaultCollectionError):
3788 registry.queryDimensionRecords("detector", datasets="bias")
3789 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3791 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3792 self.assertEqual(len(list(result)), 0)
3793 messages = list(result.explain_no_results())
3794 self.assertTrue(messages)
3795 self.assertTrue(any("because collection list is empty" in message for message in messages))
3797 def test_dataset_followup_spatial_joins(self) -> None:
3798 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3799 is involved.
3800 """
3801 registry = self.makeRegistry()
3802 self.loadData(registry, "base.yaml")
3803 self.loadData(registry, "spatial.yaml")
3804 pvi_dataset_type = DatasetType(
3805 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3806 )
3807 registry.registerDatasetType(pvi_dataset_type)
3808 collection = "datasets"
3809 registry.registerRun(collection)
3810 (pvi1,) = registry.insertDatasets(
3811 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3812 )
3813 (pvi2,) = registry.insertDatasets(
3814 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3815 )
3816 (pvi3,) = registry.insertDatasets(
3817 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3818 )
3819 self.assertEqual(
3820 set(
3821 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3822 .expanded()
3823 .findRelatedDatasets("pvi", [collection])
3824 ),
3825 {
3826 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3827 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3828 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3829 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3830 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3831 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3832 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3833 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3834 },
3835 )