Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1520 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-27 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import itertools
34import logging
35import os
36import re
37import unittest
38import uuid
39from abc import ABC, abstractmethod
40from collections import defaultdict, namedtuple
41from collections.abc import Iterator
42from datetime import datetime, timedelta
43from typing import TYPE_CHECKING
45import astropy.time
46import sqlalchemy
48try:
49 import numpy as np
50except ImportError:
51 np = None
53import lsst.sphgeom
54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
56from ..._dataset_association import DatasetAssociation
57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
58from ..._dataset_type import DatasetType
59from ..._named import NamedValueSet
60from ..._storage_class import StorageClass
61from ..._timespan import Timespan
62from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionGraph, SkyPixDimension
63from .._collection_summary import CollectionSummary
64from .._collection_type import CollectionType
65from .._config import RegistryConfig
66from .._exceptions import (
67 ArgumentError,
68 CollectionError,
69 CollectionTypeError,
70 ConflictingDefinitionError,
71 DataIdValueError,
72 DatasetTypeError,
73 InconsistentDataIdError,
74 MissingCollectionError,
75 MissingDatasetTypeError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from ..interfaces import ButlerAttributeExistsError
81if TYPE_CHECKING:
82 from ..sql_registry import SqlRegistry
85class RegistryTests(ABC):
86 """Generic tests for the `SqlRegistry` class that can be subclassed to
87 generate tests for different configurations.
88 """
90 collectionsManager: str | None = None
91 """Name of the collections manager class, if subclass provides value for
92 this member then it overrides name specified in default configuration
93 (`str`).
94 """
96 datasetsManager: str | dict[str, str] | None = None
97 """Name or configuration dictionary of the datasets manager class, if
98 subclass provides value for this member then it overrides name specified
99 in default configuration (`str` or `dict`).
100 """
102 @classmethod
103 @abstractmethod
104 def getDataDir(cls) -> str:
105 """Return the root directory containing test data YAML files."""
106 raise NotImplementedError()
108 def makeRegistryConfig(self) -> RegistryConfig:
109 """Create RegistryConfig used to create a registry.
111 This method should be called by a subclass from `makeRegistry`.
112 Returned instance will be pre-configured based on the values of class
113 members, and default-configured for all other parameters. Subclasses
114 that need default configuration should just instantiate
115 `RegistryConfig` directly.
116 """
117 config = RegistryConfig()
118 if self.collectionsManager:
119 config["managers", "collections"] = self.collectionsManager
120 if self.datasetsManager:
121 config["managers", "datasets"] = self.datasetsManager
122 return config
124 @abstractmethod
125 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None:
126 """Return the SqlRegistry instance to be tested.
128 Parameters
129 ----------
130 share_repo_with : `SqlRegistry`, optional
131 If provided, the new registry should point to the same data
132 repository as this existing registry.
134 Returns
135 -------
136 registry : `SqlRegistry`
137 New `SqlRegistry` instance, or `None` *only* if `share_repo_with`
138 is not `None` and this test case does not support that argument
139 (e.g. it is impossible with in-memory SQLite DBs).
140 """
141 raise NotImplementedError()
143 def loadData(self, registry: SqlRegistry, filename: str):
144 """Load registry test data from ``getDataDir/<filename>``,
145 which should be a YAML import/export file.
146 """
147 from ...transfers import YamlRepoImportBackend
149 with open(os.path.join(self.getDataDir(), filename)) as stream:
150 backend = YamlRepoImportBackend(stream, registry)
151 backend.register()
152 backend.load(datastore=None)
154 def checkQueryResults(self, results, expected):
155 """Check that a query results object contains expected values.
157 Parameters
158 ----------
159 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
160 A lazy-evaluation query results object.
161 expected : `list`
162 A list of `DataCoordinate` o `DatasetRef` objects that should be
163 equal to results of the query, aside from ordering.
164 """
165 self.assertCountEqual(list(results), expected)
166 self.assertEqual(results.count(), len(expected))
167 if expected:
168 self.assertTrue(results.any())
169 else:
170 self.assertFalse(results.any())
172 def testOpaque(self):
173 """Tests for `SqlRegistry.registerOpaqueTable`,
174 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
175 `SqlRegistry.deleteOpaqueData`.
176 """
177 registry = self.makeRegistry()
178 table = "opaque_table_for_testing"
179 registry.registerOpaqueTable(
180 table,
181 spec=ddl.TableSpec(
182 fields=[
183 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
184 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
185 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
186 ],
187 ),
188 )
189 rows = [
190 {"id": 1, "name": "one", "count": None},
191 {"id": 2, "name": "two", "count": 5},
192 {"id": 3, "name": "three", "count": 6},
193 ]
194 registry.insertOpaqueData(table, *rows)
195 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
197 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
198 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
200 # Test very long IN clause which exceeds sqlite limit on number of
201 # parameters. SQLite says the limit is 32k but it looks like it is
202 # much higher.
203 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
204 # Two IN clauses, each longer than 1k batch size, first with
205 # duplicates, second has matching elements in different batches (after
206 # sorting).
207 self.assertEqual(
208 rows[0:2],
209 list(
210 registry.fetchOpaqueData(
211 table,
212 id=list(range(1000)) + list(range(100, 0, -1)),
213 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
214 )
215 ),
216 )
217 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
218 registry.deleteOpaqueData(table, id=3)
219 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
220 registry.deleteOpaqueData(table)
221 self.assertEqual([], list(registry.fetchOpaqueData(table)))
223 def testDatasetType(self):
224 """Tests for `SqlRegistry.registerDatasetType` and
225 `SqlRegistry.getDatasetType`.
226 """
227 registry = self.makeRegistry()
228 # Check valid insert
229 datasetTypeName = "test"
230 storageClass = StorageClass("testDatasetType")
231 registry.storageClasses.registerStorageClass(storageClass)
232 dimensions = registry.dimensions.extract(("instrument", "visit"))
233 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
234 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
235 # Inserting for the first time should return True
236 self.assertTrue(registry.registerDatasetType(inDatasetType))
237 outDatasetType1 = registry.getDatasetType(datasetTypeName)
238 self.assertEqual(outDatasetType1, inDatasetType)
240 # Re-inserting should work
241 self.assertFalse(registry.registerDatasetType(inDatasetType))
242 # Except when they are not identical
243 with self.assertRaises(ConflictingDefinitionError):
244 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
245 registry.registerDatasetType(nonIdenticalDatasetType)
247 # Template can be None
248 datasetTypeName = "testNoneTemplate"
249 storageClass = StorageClass("testDatasetType2")
250 registry.storageClasses.registerStorageClass(storageClass)
251 dimensions = registry.dimensions.extract(("instrument", "visit"))
252 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
253 registry.registerDatasetType(inDatasetType)
254 outDatasetType2 = registry.getDatasetType(datasetTypeName)
255 self.assertEqual(outDatasetType2, inDatasetType)
257 allTypes = set(registry.queryDatasetTypes())
258 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
260 def testDimensions(self):
261 """Tests for `SqlRegistry.insertDimensionData`,
262 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
263 """
264 registry = self.makeRegistry()
265 dimensionName = "instrument"
266 dimension = registry.dimensions[dimensionName]
267 dimensionValue = {
268 "name": "DummyCam",
269 "visit_max": 10,
270 "visit_system": 0,
271 "exposure_max": 10,
272 "detector_max": 2,
273 "class_name": "lsst.pipe.base.Instrument",
274 }
275 registry.insertDimensionData(dimensionName, dimensionValue)
276 # Inserting the same value twice should fail
277 with self.assertRaises(sqlalchemy.exc.IntegrityError):
278 registry.insertDimensionData(dimensionName, dimensionValue)
279 # expandDataId should retrieve the record we just inserted
280 self.assertEqual(
281 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
282 .records[dimensionName]
283 .toDict(),
284 dimensionValue,
285 )
286 # expandDataId should raise if there is no record with the given ID.
287 with self.assertRaises(DataIdValueError):
288 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
289 # band doesn't have a table; insert should fail.
290 with self.assertRaises(TypeError):
291 registry.insertDimensionData("band", {"band": "i"})
292 dimensionName2 = "physical_filter"
293 dimension2 = registry.dimensions[dimensionName2]
294 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
295 # Missing required dependency ("instrument") should fail
296 with self.assertRaises(KeyError):
297 registry.insertDimensionData(dimensionName2, dimensionValue2)
298 # Adding required dependency should fix the failure
299 dimensionValue2["instrument"] = "DummyCam"
300 registry.insertDimensionData(dimensionName2, dimensionValue2)
301 # expandDataId should retrieve the record we just inserted.
302 self.assertEqual(
303 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
304 .records[dimensionName2]
305 .toDict(),
306 dimensionValue2,
307 )
308 # Use syncDimensionData to insert a new record successfully.
309 dimensionName3 = "detector"
310 dimensionValue3 = {
311 "instrument": "DummyCam",
312 "id": 1,
313 "full_name": "one",
314 "name_in_raft": "zero",
315 "purpose": "SCIENCE",
316 }
317 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
318 # Sync that again. Note that one field ("raft") is NULL, and that
319 # should be okay.
320 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
321 # Now try that sync with the same primary key but a different value.
322 # This should fail.
323 with self.assertRaises(ConflictingDefinitionError):
324 registry.syncDimensionData(
325 dimensionName3,
326 {
327 "instrument": "DummyCam",
328 "id": 1,
329 "full_name": "one",
330 "name_in_raft": "four",
331 "purpose": "SCIENCE",
332 },
333 )
335 @unittest.skipIf(np is None, "numpy not available.")
336 def testNumpyDataId(self):
337 """Test that we can use a numpy int in a dataId."""
338 registry = self.makeRegistry()
339 dimensionEntries = [
340 ("instrument", {"instrument": "DummyCam"}),
341 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
342 # Using an np.int64 here fails unless Records.fromDict is also
343 # patched to look for numbers.Integral
344 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
345 ]
346 for args in dimensionEntries:
347 registry.insertDimensionData(*args)
349 # Try a normal integer and something that looks like an int but
350 # is not.
351 for visit_id in (42, np.int64(42)):
352 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
353 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
354 self.assertEqual(expanded["visit"], int(visit_id))
355 self.assertIsInstance(expanded["visit"], int)
357 def testDataIdRelationships(self):
358 """Test that `SqlRegistry.expandDataId` raises an exception when the
359 given keys are inconsistent.
360 """
361 registry = self.makeRegistry()
362 self.loadData(registry, "base.yaml")
363 # Insert a few more dimension records for the next test.
364 registry.insertDimensionData(
365 "exposure",
366 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
367 )
368 registry.insertDimensionData(
369 "exposure",
370 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
371 )
372 registry.insertDimensionData(
373 "visit_system",
374 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
375 )
376 registry.insertDimensionData(
377 "visit",
378 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
379 )
380 registry.insertDimensionData(
381 "visit_definition",
382 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
383 )
384 with self.assertRaises(InconsistentDataIdError):
385 registry.expandDataId(
386 {"instrument": "Cam1", "visit": 1, "exposure": 2},
387 )
389 def testDataset(self):
390 """Basic tests for `SqlRegistry.insertDatasets`,
391 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
392 """
393 registry = self.makeRegistry()
394 self.loadData(registry, "base.yaml")
395 run = "tésτ"
396 registry.registerRun(run)
397 datasetType = registry.getDatasetType("bias")
398 dataId = {"instrument": "Cam1", "detector": 2}
399 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
400 outRef = registry.getDataset(ref.id)
401 self.assertIsNotNone(ref.id)
402 self.assertEqual(ref, outRef)
403 with self.assertRaises(ConflictingDefinitionError):
404 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
405 registry.removeDatasets([ref])
406 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
408 def testFindDataset(self):
409 """Tests for `SqlRegistry.findDataset`."""
410 registry = self.makeRegistry()
411 self.loadData(registry, "base.yaml")
412 run = "tésτ"
413 datasetType = registry.getDatasetType("bias")
414 dataId = {"instrument": "Cam1", "detector": 4}
415 registry.registerRun(run)
416 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
417 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
418 self.assertEqual(outputRef, inputRef)
419 # Check that retrieval with invalid dataId raises
420 with self.assertRaises(LookupError):
421 dataId = {"instrument": "Cam1"} # no detector
422 registry.findDataset(datasetType, dataId, collections=run)
423 # Check that different dataIds match to different datasets
424 dataId1 = {"instrument": "Cam1", "detector": 1}
425 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
426 dataId2 = {"instrument": "Cam1", "detector": 2}
427 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
428 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
429 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
430 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
431 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
432 # Check that requesting a non-existing dataId returns None
433 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
434 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
435 # Search more than one collection, in which two have the right
436 # dataset type and another does not.
437 registry.registerRun("empty")
438 self.loadData(registry, "datasets-uuid.yaml")
439 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
440 self.assertIsNotNone(bias1)
441 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
442 self.assertIsNotNone(bias2)
443 self.assertEqual(
444 bias1,
445 registry.findDataset(
446 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
447 ),
448 )
449 self.assertEqual(
450 bias2,
451 registry.findDataset(
452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
453 ),
454 )
455 # Search more than one collection, with one of them a CALIBRATION
456 # collection.
457 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
458 timespan = Timespan(
459 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
460 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
461 )
462 registry.certify("Cam1/calib", [bias2], timespan=timespan)
463 self.assertEqual(
464 bias1,
465 registry.findDataset(
466 "bias",
467 instrument="Cam1",
468 detector=2,
469 collections=["empty", "imported_g", "Cam1/calib"],
470 timespan=timespan,
471 ),
472 )
473 self.assertEqual(
474 bias2,
475 registry.findDataset(
476 "bias",
477 instrument="Cam1",
478 detector=2,
479 collections=["empty", "Cam1/calib", "imported_g"],
480 timespan=timespan,
481 ),
482 )
483 # If we try to search those same collections without a timespan, it
484 # should still work, since the CALIBRATION collection is ignored.
485 self.assertEqual(
486 bias1,
487 registry.findDataset(
488 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
489 ),
490 )
491 self.assertEqual(
492 bias1,
493 registry.findDataset(
494 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
495 ),
496 )
498 def testRemoveDatasetTypeSuccess(self):
499 """Test that SqlRegistry.removeDatasetType works when there are no
500 datasets of that type present.
501 """
502 registry = self.makeRegistry()
503 self.loadData(registry, "base.yaml")
504 registry.removeDatasetType("flat")
505 with self.assertRaises(MissingDatasetTypeError):
506 registry.getDatasetType("flat")
508 def testRemoveDatasetTypeFailure(self):
509 """Test that SqlRegistry.removeDatasetType raises when there are
510 datasets of that type present or if the dataset type is for a
511 component.
512 """
513 registry = self.makeRegistry()
514 self.loadData(registry, "base.yaml")
515 self.loadData(registry, "datasets.yaml")
516 with self.assertRaises(OrphanedRecordError):
517 registry.removeDatasetType("flat")
518 with self.assertRaises(ValueError):
519 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
521 def testImportDatasetsUUID(self):
522 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
523 if isinstance(self.datasetsManager, str):
524 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
525 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
526 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
527 ".ByDimensionsDatasetRecordStorageManagerUUID"
528 ):
529 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
531 registry = self.makeRegistry()
532 self.loadData(registry, "base.yaml")
533 for run in range(6):
534 registry.registerRun(f"run{run}")
535 datasetTypeBias = registry.getDatasetType("bias")
536 datasetTypeFlat = registry.getDatasetType("flat")
537 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
538 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
539 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
541 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
542 (ref1,) = registry._importDatasets([ref])
543 # UUID is used without change
544 self.assertEqual(ref.id, ref1.id)
546 # All different failure modes
547 refs = (
548 # Importing same DatasetRef with different dataset ID is an error
549 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
550 # Same DatasetId but different DataId
551 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
552 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
553 # Same DatasetRef and DatasetId but different run
554 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
555 )
556 for ref in refs:
557 with self.assertRaises(ConflictingDefinitionError):
558 registry._importDatasets([ref])
560 # Test for non-unique IDs, they can be re-imported multiple times.
561 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
562 with self.subTest(idGenMode=idGenMode):
563 # Make dataset ref with reproducible dataset ID.
564 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
565 (ref1,) = registry._importDatasets([ref])
566 self.assertIsInstance(ref1.id, uuid.UUID)
567 self.assertEqual(ref1.id.version, 5)
568 self.assertEqual(ref1.id, ref.id)
570 # Importing it again is OK
571 (ref2,) = registry._importDatasets([ref1])
572 self.assertEqual(ref2.id, ref1.id)
574 # Cannot import to different run with the same ID
575 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
576 with self.assertRaises(ConflictingDefinitionError):
577 registry._importDatasets([ref])
579 ref = DatasetRef(
580 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
581 )
582 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
583 # Cannot import same DATAID_TYPE ref into a new run
584 with self.assertRaises(ConflictingDefinitionError):
585 (ref2,) = registry._importDatasets([ref])
586 else:
587 # DATAID_TYPE_RUN ref can be imported into a new run
588 (ref2,) = registry._importDatasets([ref])
590 def testDatasetTypeComponentQueries(self):
591 """Test component options when querying for dataset types.
593 All of the behavior here is deprecated, so many of these tests are
594 currently wrapped in a context to check that we get a warning whenever
595 a component dataset is actually returned.
596 """
597 registry = self.makeRegistry()
598 self.loadData(registry, "base.yaml")
599 self.loadData(registry, "datasets.yaml")
600 # Test querying for dataset types with different inputs.
601 # First query for all dataset types; components should only be included
602 # when components=True.
603 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
604 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
605 with self.assertWarns(FutureWarning):
606 self.assertLess(
607 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
608 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
609 )
610 # Use a pattern that can match either parent or components. Again,
611 # components are only returned if components=True.
612 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
613 self.assertEqual(
614 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
615 )
616 with self.assertWarns(FutureWarning):
617 self.assertLess(
618 {"bias", "bias.wcs"},
619 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
620 )
621 # This pattern matches only a component. In this case we also return
622 # that component dataset type if components=None.
623 with self.assertWarns(FutureWarning):
624 self.assertEqual(
625 {"bias.wcs"},
626 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names,
627 )
628 self.assertEqual(
629 set(),
630 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
631 )
632 with self.assertWarns(FutureWarning):
633 self.assertEqual(
634 {"bias.wcs"},
635 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
636 )
637 # Add a dataset type using a StorageClass that we'll then remove; check
638 # that this does not affect our ability to query for dataset types
639 # (though it will warn).
640 tempStorageClass = StorageClass(
641 name="TempStorageClass",
642 components={
643 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
644 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
645 },
646 )
647 registry.storageClasses.registerStorageClass(tempStorageClass)
648 datasetType = DatasetType(
649 "temporary",
650 dimensions=["instrument"],
651 storageClass=tempStorageClass,
652 universe=registry.dimensions,
653 )
654 registry.registerDatasetType(datasetType)
655 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
656 datasetType._storageClass = None
657 del tempStorageClass
658 # Querying for all dataset types, including components, should include
659 # at least all non-component dataset types (and I don't want to
660 # enumerate all of the Exposure components for bias and flat here).
661 with self.assertWarns(FutureWarning):
662 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
663 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
664 self.assertIn("TempStorageClass", cm.output[0])
665 self.assertLess({"bias", "flat", "temporary"}, everything.names)
666 # It should not include "temporary.columns", because we tried to remove
667 # the storage class that would tell it about that. So if the next line
668 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
669 # this part of the test isn't doing anything, because the _unregister
670 # call about isn't simulating the real-life case we want it to
671 # simulate, in which different versions of daf_butler in entirely
672 # different Python processes interact with the same repo.
673 self.assertNotIn("temporary.data", everything.names)
674 # Query for dataset types that start with "temp". This should again
675 # not include the component, and also not fail.
676 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
677 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
678 self.assertIn("TempStorageClass", cm.output[0])
679 self.assertEqual({"temporary"}, startsWithTemp.names)
680 # Querying with no components should not warn at all.
681 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
682 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
683 # Must issue a warning of our own to be captured.
684 logging.getLogger("lsst.daf.butler.registries").warning("test message")
685 self.assertEqual(len(cm.output), 1)
686 self.assertIn("test message", cm.output[0])
688 def testComponentLookups(self):
689 """Test searching for component datasets via their parents.
691 All of the behavior here is deprecated, so many of these tests are
692 currently wrapped in a context to check that we get a warning whenever
693 a component dataset is actually returned.
694 """
695 registry = self.makeRegistry()
696 self.loadData(registry, "base.yaml")
697 self.loadData(registry, "datasets.yaml")
698 # Test getting the child dataset type (which does still exist in the
699 # Registry), and check for consistency with
700 # DatasetRef.makeComponentRef.
701 collection = "imported_g"
702 parentType = registry.getDatasetType("bias")
703 childType = registry.getDatasetType("bias.wcs")
704 parentRefResolved = registry.findDataset(
705 parentType, collections=collection, instrument="Cam1", detector=1
706 )
707 self.assertIsInstance(parentRefResolved, DatasetRef)
708 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
709 # Search for a single dataset with findDataset.
710 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
711 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
712 # Search for detector data IDs constrained by component dataset
713 # existence with queryDataIds.
714 with self.assertWarns(FutureWarning):
715 dataIds = registry.queryDataIds(
716 ["detector"],
717 datasets=["bias.wcs"],
718 collections=collection,
719 ).toSet()
720 self.assertEqual(
721 dataIds,
722 DataCoordinateSet(
723 {
724 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
725 for d in (1, 2, 3)
726 },
727 parentType.dimensions,
728 ),
729 )
730 # Search for multiple datasets of a single type with queryDatasets.
731 with self.assertWarns(FutureWarning):
732 childRefs2 = set(
733 registry.queryDatasets(
734 "bias.wcs",
735 collections=collection,
736 )
737 )
738 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType})
739 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds))
741 def testCollections(self):
742 """Tests for registry methods that manage collections."""
743 registry = self.makeRegistry()
744 other_registry = self.makeRegistry(share_repo_with=registry)
745 self.loadData(registry, "base.yaml")
746 self.loadData(registry, "datasets.yaml")
747 run1 = "imported_g"
748 run2 = "imported_r"
749 # Test setting a collection docstring after it has been created.
750 registry.setCollectionDocumentation(run1, "doc for run1")
751 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
752 registry.setCollectionDocumentation(run1, None)
753 self.assertIsNone(registry.getCollectionDocumentation(run1))
754 datasetType = "bias"
755 # Find some datasets via their run's collection.
756 dataId1 = {"instrument": "Cam1", "detector": 1}
757 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
758 self.assertIsNotNone(ref1)
759 dataId2 = {"instrument": "Cam1", "detector": 2}
760 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
761 self.assertIsNotNone(ref2)
762 # Associate those into a new collection, then look for them there.
763 tag1 = "tag1"
764 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
765 # Check that we can query for old and new collections by type.
766 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
767 self.assertEqual(
768 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
769 {tag1, run1, run2},
770 )
771 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
772 registry.associate(tag1, [ref1, ref2])
773 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
774 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
775 # Disassociate one and verify that we can't it there anymore...
776 registry.disassociate(tag1, [ref1])
777 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
778 # ...but we can still find ref2 in tag1, and ref1 in the run.
779 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
780 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
781 collections = set(registry.queryCollections())
782 self.assertEqual(collections, {run1, run2, tag1})
783 # Associate both refs into tag1 again; ref2 is already there, but that
784 # should be a harmless no-op.
785 registry.associate(tag1, [ref1, ref2])
786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
787 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
788 # Get a different dataset (from a different run) that has the same
789 # dataset type and data ID as ref2.
790 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
791 self.assertNotEqual(ref2, ref2b)
792 # Attempting to associate that into tag1 should be an error.
793 with self.assertRaises(ConflictingDefinitionError):
794 registry.associate(tag1, [ref2b])
795 # That error shouldn't have messed up what we had before.
796 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
797 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
798 # Attempt to associate the conflicting dataset again, this time with
799 # a dataset that isn't in the collection and won't cause a conflict.
800 # Should also fail without modifying anything.
801 dataId3 = {"instrument": "Cam1", "detector": 3}
802 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
803 with self.assertRaises(ConflictingDefinitionError):
804 registry.associate(tag1, [ref3, ref2b])
805 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
806 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
807 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
808 # Register a chained collection that searches [tag1, run2]
809 chain1 = "chain1"
810 registry.registerCollection(chain1, type=CollectionType.CHAINED)
811 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
812 # Chained collection exists, but has no collections in it.
813 self.assertFalse(registry.getCollectionChain(chain1))
814 # If we query for all collections, we should get the chained collection
815 # only if we don't ask to flatten it (i.e. yield only its children).
816 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
817 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
818 # Attempt to set its child collections to something circular; that
819 # should fail.
820 with self.assertRaises(ValueError):
821 registry.setCollectionChain(chain1, [tag1, chain1])
822 # Add the child collections.
823 registry.setCollectionChain(chain1, [tag1, run2])
824 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
825 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
826 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
827 # Refresh the other registry that points to the same repo, and make
828 # sure it can see the things we've done (note that this does require
829 # an explicit refresh(); that's the documented behavior, because
830 # caching is ~impossible otherwise).
831 if other_registry is not None:
832 other_registry.refresh()
833 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
834 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
835 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
836 # Searching for dataId1 or dataId2 in the chain should return ref1 and
837 # ref2, because both are in tag1.
838 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
839 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
840 # Now disassociate ref2 from tag1. The search (for bias) with
841 # dataId2 in chain1 should then:
842 # 1. not find it in tag1
843 # 2. find a different dataset in run2
844 registry.disassociate(tag1, [ref2])
845 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
846 self.assertNotEqual(ref2b, ref2)
847 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
848 # Define a new chain so we can test recursive chains.
849 chain2 = "chain2"
850 registry.registerCollection(chain2, type=CollectionType.CHAINED)
851 registry.setCollectionChain(chain2, [run2, chain1])
852 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
853 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
854 # Query for collections matching a regex.
855 self.assertCountEqual(
856 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
857 ["imported_r", "imported_g"],
858 )
859 # Query for collections matching a regex or an explicit str.
860 self.assertCountEqual(
861 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
862 ["imported_r", "imported_g", "chain1"],
863 )
864 # Search for bias with dataId1 should find it via tag1 in chain2,
865 # recursing, because is not in run1.
866 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
867 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
868 # Search for bias with dataId2 should find it in run2 (ref2b).
869 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
870 # Search for a flat that is in run2. That should not be found
871 # at the front of chain2, because of the restriction to bias
872 # on run2 there, but it should be found in at the end of chain1.
873 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
874 ref4 = registry.findDataset("flat", dataId4, collections=run2)
875 self.assertIsNotNone(ref4)
876 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
877 # Deleting a collection that's part of a CHAINED collection is not
878 # allowed, and is exception-safe.
879 with self.assertRaises(sqlalchemy.exc.IntegrityError):
880 registry.removeCollection(run2)
881 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
882 with self.assertRaises(sqlalchemy.exc.IntegrityError):
883 registry.removeCollection(chain1)
884 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
885 # Actually remove chain2, test that it's gone by asking for its type.
886 registry.removeCollection(chain2)
887 with self.assertRaises(MissingCollectionError):
888 registry.getCollectionType(chain2)
889 # Actually remove run2 and chain1, which should work now.
890 registry.removeCollection(chain1)
891 registry.removeCollection(run2)
892 with self.assertRaises(MissingCollectionError):
893 registry.getCollectionType(run2)
894 with self.assertRaises(MissingCollectionError):
895 registry.getCollectionType(chain1)
896 # Remove tag1 as well, just to test that we can remove TAGGED
897 # collections.
898 registry.removeCollection(tag1)
899 with self.assertRaises(MissingCollectionError):
900 registry.getCollectionType(tag1)
902 def testCollectionChainFlatten(self):
903 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
904 option.
905 """
906 registry = self.makeRegistry()
907 registry.registerCollection("inner", CollectionType.CHAINED)
908 registry.registerCollection("innermost", CollectionType.RUN)
909 registry.setCollectionChain("inner", ["innermost"])
910 registry.registerCollection("outer", CollectionType.CHAINED)
911 registry.setCollectionChain("outer", ["inner"], flatten=False)
912 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
913 registry.setCollectionChain("outer", ["inner"], flatten=True)
914 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
916 def testBasicTransaction(self):
917 """Test that all operations within a single transaction block are
918 rolled back if an exception propagates out of the block.
919 """
920 registry = self.makeRegistry()
921 storageClass = StorageClass("testDatasetType")
922 registry.storageClasses.registerStorageClass(storageClass)
923 with registry.transaction():
924 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
925 with self.assertRaises(ValueError):
926 with registry.transaction():
927 registry.insertDimensionData("instrument", {"name": "Cam2"})
928 raise ValueError("Oops, something went wrong")
929 # Cam1 should exist
930 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
931 # But Cam2 and Cam3 should both not exist
932 with self.assertRaises(DataIdValueError):
933 registry.expandDataId(instrument="Cam2")
934 with self.assertRaises(DataIdValueError):
935 registry.expandDataId(instrument="Cam3")
937 def testNestedTransaction(self):
938 """Test that operations within a transaction block are not rolled back
939 if an exception propagates out of an inner transaction block and is
940 then caught.
941 """
942 registry = self.makeRegistry()
943 dimension = registry.dimensions["instrument"]
944 dataId1 = {"instrument": "DummyCam"}
945 dataId2 = {"instrument": "DummyCam2"}
946 checkpointReached = False
947 with registry.transaction():
948 # This should be added and (ultimately) committed.
949 registry.insertDimensionData(dimension, dataId1)
950 with self.assertRaises(sqlalchemy.exc.IntegrityError):
951 with registry.transaction(savepoint=True):
952 # This does not conflict, and should succeed (but not
953 # be committed).
954 registry.insertDimensionData(dimension, dataId2)
955 checkpointReached = True
956 # This should conflict and raise, triggerring a rollback
957 # of the previous insertion within the same transaction
958 # context, but not the original insertion in the outer
959 # block.
960 registry.insertDimensionData(dimension, dataId1)
961 self.assertTrue(checkpointReached)
962 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
963 with self.assertRaises(DataIdValueError):
964 registry.expandDataId(dataId2, graph=dimension.graph)
966 def testInstrumentDimensions(self):
967 """Test queries involving only instrument dimensions, with no joins to
968 skymap.
969 """
970 registry = self.makeRegistry()
972 # need a bunch of dimensions and datasets for test
973 registry.insertDimensionData(
974 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
975 )
976 registry.insertDimensionData(
977 "physical_filter",
978 dict(instrument="DummyCam", name="dummy_r", band="r"),
979 dict(instrument="DummyCam", name="dummy_i", band="i"),
980 )
981 registry.insertDimensionData(
982 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
983 )
984 registry.insertDimensionData(
985 "visit_system",
986 dict(instrument="DummyCam", id=1, name="default"),
987 )
988 registry.insertDimensionData(
989 "visit",
990 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
991 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
992 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
993 )
994 for i in range(1, 6):
995 registry.insertDimensionData(
996 "visit_detector_region",
997 dict(instrument="DummyCam", visit=10, detector=i),
998 dict(instrument="DummyCam", visit=11, detector=i),
999 dict(instrument="DummyCam", visit=20, detector=i),
1000 )
1001 registry.insertDimensionData(
1002 "exposure",
1003 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
1004 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
1005 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
1006 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
1007 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
1008 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
1009 )
1010 registry.insertDimensionData(
1011 "visit_definition",
1012 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1013 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1014 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1015 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1016 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1017 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1018 )
1019 # dataset types
1020 run1 = "test1_r"
1021 run2 = "test2_r"
1022 tagged2 = "test2_t"
1023 registry.registerRun(run1)
1024 registry.registerRun(run2)
1025 registry.registerCollection(tagged2)
1026 storageClass = StorageClass("testDataset")
1027 registry.storageClasses.registerStorageClass(storageClass)
1028 rawType = DatasetType(
1029 name="RAW",
1030 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1031 storageClass=storageClass,
1032 )
1033 registry.registerDatasetType(rawType)
1034 calexpType = DatasetType(
1035 name="CALEXP",
1036 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1037 storageClass=storageClass,
1038 )
1039 registry.registerDatasetType(calexpType)
1041 # add pre-existing datasets
1042 for exposure in (100, 101, 110, 111):
1043 for detector in (1, 2, 3):
1044 # note that only 3 of 5 detectors have datasets
1045 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1046 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1047 # exposures 100 and 101 appear in both run1 and tagged2.
1048 # 100 has different datasets in the different collections
1049 # 101 has the same dataset in both collections.
1050 if exposure == 100:
1051 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1052 if exposure in (100, 101):
1053 registry.associate(tagged2, [ref])
1054 # Add pre-existing datasets to tagged2.
1055 for exposure in (200, 201):
1056 for detector in (3, 4, 5):
1057 # note that only 3 of 5 detectors have datasets
1058 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1059 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1060 registry.associate(tagged2, [ref])
1062 dimensions = DimensionGraph(
1063 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1064 )
1065 # Test that single dim string works as well as list of str
1066 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1067 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1068 self.assertEqual(rows, rowsI)
1069 # with empty expression
1070 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1071 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1072 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1073 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1074 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1076 # second collection
1077 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1078 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1079 for dataId in rows:
1080 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1081 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1082 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1083 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1085 # with two input datasets
1086 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1087 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1088 for dataId in rows:
1089 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1090 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1091 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1092 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1094 # limit to single visit
1095 rows = registry.queryDataIds(
1096 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1097 ).toSet()
1098 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1099 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1100 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1101 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1103 # more limiting expression, using link names instead of Table.column
1104 rows = registry.queryDataIds(
1105 dimensions,
1106 datasets=rawType,
1107 collections=run1,
1108 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1109 ).toSet()
1110 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1111 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1112 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1113 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1115 # queryDataIds with only one of `datasets` and `collections` is an
1116 # error.
1117 with self.assertRaises(CollectionError):
1118 registry.queryDataIds(dimensions, datasets=rawType)
1119 with self.assertRaises(ArgumentError):
1120 registry.queryDataIds(dimensions, collections=run1)
1122 # expression excludes everything
1123 rows = registry.queryDataIds(
1124 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1125 ).toSet()
1126 self.assertEqual(len(rows), 0)
1128 # Selecting by physical_filter, this is not in the dimensions, but it
1129 # is a part of the full expression so it should work too.
1130 rows = registry.queryDataIds(
1131 dimensions,
1132 datasets=rawType,
1133 collections=run1,
1134 where="physical_filter = 'dummy_r'",
1135 instrument="DummyCam",
1136 ).toSet()
1137 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1138 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1139 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1140 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1142 def testSkyMapDimensions(self):
1143 """Tests involving only skymap dimensions, no joins to instrument."""
1144 registry = self.makeRegistry()
1146 # need a bunch of dimensions and datasets for test, we want
1147 # "band" in the test so also have to add physical_filter
1148 # dimensions
1149 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1150 registry.insertDimensionData(
1151 "physical_filter",
1152 dict(instrument="DummyCam", name="dummy_r", band="r"),
1153 dict(instrument="DummyCam", name="dummy_i", band="i"),
1154 )
1155 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1156 for tract in range(10):
1157 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1158 registry.insertDimensionData(
1159 "patch",
1160 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1161 )
1163 # dataset types
1164 run = "tésτ"
1165 registry.registerRun(run)
1166 storageClass = StorageClass("testDataset")
1167 registry.storageClasses.registerStorageClass(storageClass)
1168 calexpType = DatasetType(
1169 name="deepCoadd_calexp",
1170 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1171 storageClass=storageClass,
1172 )
1173 registry.registerDatasetType(calexpType)
1174 mergeType = DatasetType(
1175 name="deepCoadd_mergeDet",
1176 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1177 storageClass=storageClass,
1178 )
1179 registry.registerDatasetType(mergeType)
1180 measType = DatasetType(
1181 name="deepCoadd_meas",
1182 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1183 storageClass=storageClass,
1184 )
1185 registry.registerDatasetType(measType)
1187 dimensions = DimensionGraph(
1188 registry.dimensions,
1189 dimensions=(
1190 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1191 ),
1192 )
1194 # add pre-existing datasets
1195 for tract in (1, 3, 5):
1196 for patch in (2, 4, 6, 7):
1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1198 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1199 for aFilter in ("i", "r"):
1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1201 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1203 # with empty expression
1204 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1205 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1206 for dataId in rows:
1207 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1208 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1209 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1210 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1212 # limit to 2 tracts and 2 patches
1213 rows = registry.queryDataIds(
1214 dimensions,
1215 datasets=[calexpType, mergeType],
1216 collections=run,
1217 where="tract IN (1, 5) AND patch IN (2, 7)",
1218 skymap="DummyMap",
1219 ).toSet()
1220 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1221 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1222 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1223 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1225 # limit to single filter
1226 rows = registry.queryDataIds(
1227 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1228 ).toSet()
1229 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1230 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1231 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1232 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1234 # Specifying non-existing skymap is an exception
1235 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1236 rows = registry.queryDataIds(
1237 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1238 ).toSet()
1240 def testSpatialJoin(self):
1241 """Test queries that involve spatial overlap joins."""
1242 registry = self.makeRegistry()
1243 self.loadData(registry, "hsc-rc2-subset.yaml")
1245 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1246 # the TopologicalFamily they belong to. We'll relate all elements in
1247 # each family to all of the elements in each other family.
1248 families = defaultdict(set)
1249 # Dictionary of {element.name: {dataId: region}}.
1250 regions = {}
1251 for element in registry.dimensions.getDatabaseElements():
1252 if element.spatial is not None:
1253 families[element.spatial.name].add(element)
1254 regions[element.name] = {
1255 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1256 }
1258 # If this check fails, it's not necessarily a problem - it may just be
1259 # a reasonable change to the default dimension definitions - but the
1260 # test below depends on there being more than one family to do anything
1261 # useful.
1262 self.assertEqual(len(families), 2)
1264 # Overlap DatabaseDimensionElements with each other.
1265 for family1, family2 in itertools.combinations(families, 2):
1266 for element1, element2 in itertools.product(families[family1], families[family2]):
1267 graph = DimensionGraph.union(element1.graph, element2.graph)
1268 # Construct expected set of overlapping data IDs via a
1269 # brute-force comparison of the regions we've already fetched.
1270 expected = {
1271 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1272 for (dataId1, region1), (dataId2, region2) in itertools.product(
1273 regions[element1.name].items(), regions[element2.name].items()
1274 )
1275 if not region1.isDisjointFrom(region2)
1276 }
1277 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1278 queried = set(registry.queryDataIds(graph))
1279 self.assertEqual(expected, queried)
1281 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1282 commonSkyPix = registry.dimensions.commonSkyPix
1283 for elementName, these_regions in regions.items():
1284 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1285 expected = set()
1286 for dataId, region in these_regions.items():
1287 for begin, end in commonSkyPix.pixelization.envelope(region):
1288 expected.update(
1289 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1290 for index in range(begin, end)
1291 )
1292 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1293 queried = set(registry.queryDataIds(graph))
1294 self.assertEqual(expected, queried)
1296 def testAbstractQuery(self):
1297 """Test that we can run a query that just lists the known
1298 bands. This is tricky because band is
1299 backed by a query against physical_filter.
1300 """
1301 registry = self.makeRegistry()
1302 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1303 registry.insertDimensionData(
1304 "physical_filter",
1305 dict(instrument="DummyCam", name="dummy_i", band="i"),
1306 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1307 dict(instrument="DummyCam", name="dummy_r", band="r"),
1308 )
1309 rows = registry.queryDataIds(["band"]).toSet()
1310 self.assertCountEqual(
1311 rows,
1312 [
1313 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1314 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1315 ],
1316 )
1318 def testAttributeManager(self):
1319 """Test basic functionality of attribute manager."""
1320 # number of attributes with schema versions in a fresh database,
1321 # 6 managers with 2 records per manager, plus config for dimensions
1322 VERSION_COUNT = 6 * 2 + 1
1324 registry = self.makeRegistry()
1325 attributes = registry._managers.attributes
1327 # check what get() returns for non-existing key
1328 self.assertIsNone(attributes.get("attr"))
1329 self.assertEqual(attributes.get("attr", ""), "")
1330 self.assertEqual(attributes.get("attr", "Value"), "Value")
1331 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1333 # cannot store empty key or value
1334 with self.assertRaises(ValueError):
1335 attributes.set("", "value")
1336 with self.assertRaises(ValueError):
1337 attributes.set("attr", "")
1339 # set value of non-existing key
1340 attributes.set("attr", "value")
1341 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1342 self.assertEqual(attributes.get("attr"), "value")
1344 # update value of existing key
1345 with self.assertRaises(ButlerAttributeExistsError):
1346 attributes.set("attr", "value2")
1348 attributes.set("attr", "value2", force=True)
1349 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1350 self.assertEqual(attributes.get("attr"), "value2")
1352 # delete existing key
1353 self.assertTrue(attributes.delete("attr"))
1354 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1356 # delete non-existing key
1357 self.assertFalse(attributes.delete("non-attr"))
1359 # store bunch of keys and get the list back
1360 data = [
1361 ("version.core", "1.2.3"),
1362 ("version.dimensions", "3.2.1"),
1363 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1364 ]
1365 for key, value in data:
1366 attributes.set(key, value)
1367 items = dict(attributes.items())
1368 for key, value in data:
1369 self.assertEqual(items[key], value)
1371 def testQueryDatasetsDeduplication(self):
1372 """Test that the findFirst option to queryDatasets selects datasets
1373 from collections in the order given".
1374 """
1375 registry = self.makeRegistry()
1376 self.loadData(registry, "base.yaml")
1377 self.loadData(registry, "datasets.yaml")
1378 self.assertCountEqual(
1379 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1380 [
1381 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1384 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1385 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1386 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1387 ],
1388 )
1389 self.assertCountEqual(
1390 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1391 [
1392 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1393 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1394 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1395 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1396 ],
1397 )
1398 self.assertCountEqual(
1399 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1400 [
1401 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1402 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1403 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1404 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1405 ],
1406 )
1408 def testQueryResults(self):
1409 """Test querying for data IDs and then manipulating the QueryResults
1410 object returned to perform other queries.
1411 """
1412 registry = self.makeRegistry()
1413 self.loadData(registry, "base.yaml")
1414 self.loadData(registry, "datasets.yaml")
1415 bias = registry.getDatasetType("bias")
1416 flat = registry.getDatasetType("flat")
1417 # Obtain expected results from methods other than those we're testing
1418 # here. That includes:
1419 # - the dimensions of the data IDs we want to query:
1420 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1421 # - the dimensions of some other data IDs we'll extract from that:
1422 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1423 # - the data IDs we expect to obtain from the first queries:
1424 expectedDataIds = DataCoordinateSet(
1425 {
1426 DataCoordinate.standardize(
1427 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1428 )
1429 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1430 },
1431 graph=expectedGraph,
1432 hasFull=False,
1433 hasRecords=False,
1434 )
1435 # - the flat datasets we expect to find from those data IDs, in just
1436 # one collection (so deduplication is irrelevant):
1437 expectedFlats = [
1438 registry.findDataset(
1439 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1440 ),
1441 registry.findDataset(
1442 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1443 ),
1444 registry.findDataset(
1445 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1446 ),
1447 ]
1448 # - the data IDs we expect to extract from that:
1449 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1450 # - the bias datasets we expect to find from those data IDs, after we
1451 # subset-out the physical_filter dimension, both with duplicates:
1452 expectedAllBiases = [
1453 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1456 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1457 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1458 ]
1459 # - ...and without duplicates:
1460 expectedDeduplicatedBiases = [
1461 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1462 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1463 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1464 ]
1465 # Test against those expected results, using a "lazy" query for the
1466 # data IDs (which re-executes that query each time we use it to do
1467 # something new).
1468 dataIds = registry.queryDataIds(
1469 ["detector", "physical_filter"],
1470 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1471 instrument="Cam1",
1472 )
1473 self.assertEqual(dataIds.graph, expectedGraph)
1474 self.assertEqual(dataIds.toSet(), expectedDataIds)
1475 self.assertCountEqual(
1476 list(
1477 dataIds.findDatasets(
1478 flat,
1479 collections=["imported_r"],
1480 )
1481 ),
1482 expectedFlats,
1483 )
1484 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1485 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1486 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1487 self.assertCountEqual(
1488 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1489 expectedAllBiases,
1490 )
1491 self.assertCountEqual(
1492 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1493 expectedDeduplicatedBiases,
1494 )
1496 # Searching for a dataset with dimensions we had projected away
1497 # restores those dimensions.
1498 self.assertCountEqual(
1499 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1500 expectedFlats,
1501 )
1503 # Use a component dataset type.
1504 self.assertCountEqual(
1505 [
1506 ref.makeComponentRef("image")
1507 for ref in subsetDataIds.findDatasets(
1508 bias,
1509 collections=["imported_r", "imported_g"],
1510 findFirst=False,
1511 )
1512 ],
1513 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1514 )
1516 # Use a named dataset type that does not exist and a dataset type
1517 # object that does not exist.
1518 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1520 # Test both string name and dataset type object.
1521 test_type: str | DatasetType
1522 for test_type, test_type_name in (
1523 (unknown_type, unknown_type.name),
1524 (unknown_type.name, unknown_type.name),
1525 ):
1526 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1527 list(
1528 subsetDataIds.findDatasets(
1529 test_type, collections=["imported_r", "imported_g"], findFirst=True
1530 )
1531 )
1533 # Materialize the bias dataset queries (only) by putting the results
1534 # into temporary tables, then repeat those tests.
1535 with subsetDataIds.findDatasets(
1536 bias, collections=["imported_r", "imported_g"], findFirst=False
1537 ).materialize() as biases:
1538 self.assertCountEqual(list(biases), expectedAllBiases)
1539 with subsetDataIds.findDatasets(
1540 bias, collections=["imported_r", "imported_g"], findFirst=True
1541 ).materialize() as biases:
1542 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1543 # Materialize the data ID subset query, but not the dataset queries.
1544 with subsetDataIds.materialize() as subsetDataIds:
1545 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1546 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1547 self.assertCountEqual(
1548 list(
1549 subsetDataIds.findDatasets(
1550 bias, collections=["imported_r", "imported_g"], findFirst=False
1551 )
1552 ),
1553 expectedAllBiases,
1554 )
1555 self.assertCountEqual(
1556 list(
1557 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1558 ),
1559 expectedDeduplicatedBiases,
1560 )
1561 # Materialize the dataset queries, too.
1562 with subsetDataIds.findDatasets(
1563 bias, collections=["imported_r", "imported_g"], findFirst=False
1564 ).materialize() as biases:
1565 self.assertCountEqual(list(biases), expectedAllBiases)
1566 with subsetDataIds.findDatasets(
1567 bias, collections=["imported_r", "imported_g"], findFirst=True
1568 ).materialize() as biases:
1569 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1570 # Materialize the original query, but none of the follow-up queries.
1571 with dataIds.materialize() as dataIds:
1572 self.assertEqual(dataIds.graph, expectedGraph)
1573 self.assertEqual(dataIds.toSet(), expectedDataIds)
1574 self.assertCountEqual(
1575 list(
1576 dataIds.findDatasets(
1577 flat,
1578 collections=["imported_r"],
1579 )
1580 ),
1581 expectedFlats,
1582 )
1583 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1584 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1585 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1586 self.assertCountEqual(
1587 list(
1588 subsetDataIds.findDatasets(
1589 bias, collections=["imported_r", "imported_g"], findFirst=False
1590 )
1591 ),
1592 expectedAllBiases,
1593 )
1594 self.assertCountEqual(
1595 list(
1596 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1597 ),
1598 expectedDeduplicatedBiases,
1599 )
1600 # Materialize just the bias dataset queries.
1601 with subsetDataIds.findDatasets(
1602 bias, collections=["imported_r", "imported_g"], findFirst=False
1603 ).materialize() as biases:
1604 self.assertCountEqual(list(biases), expectedAllBiases)
1605 with subsetDataIds.findDatasets(
1606 bias, collections=["imported_r", "imported_g"], findFirst=True
1607 ).materialize() as biases:
1608 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1609 # Materialize the subset data ID query, but not the dataset
1610 # queries.
1611 with subsetDataIds.materialize() as subsetDataIds:
1612 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1613 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1614 self.assertCountEqual(
1615 list(
1616 subsetDataIds.findDatasets(
1617 bias, collections=["imported_r", "imported_g"], findFirst=False
1618 )
1619 ),
1620 expectedAllBiases,
1621 )
1622 self.assertCountEqual(
1623 list(
1624 subsetDataIds.findDatasets(
1625 bias, collections=["imported_r", "imported_g"], findFirst=True
1626 )
1627 ),
1628 expectedDeduplicatedBiases,
1629 )
1630 # Materialize the bias dataset queries, too, so now we're
1631 # materializing every single step.
1632 with subsetDataIds.findDatasets(
1633 bias, collections=["imported_r", "imported_g"], findFirst=False
1634 ).materialize() as biases:
1635 self.assertCountEqual(list(biases), expectedAllBiases)
1636 with subsetDataIds.findDatasets(
1637 bias, collections=["imported_r", "imported_g"], findFirst=True
1638 ).materialize() as biases:
1639 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1641 def testStorageClassPropagation(self):
1642 """Test that queries for datasets respect the storage class passed in
1643 as part of a full dataset type.
1644 """
1645 registry = self.makeRegistry()
1646 self.loadData(registry, "base.yaml")
1647 dataset_type_in_registry = DatasetType(
1648 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1649 )
1650 registry.registerDatasetType(dataset_type_in_registry)
1651 run = "run1"
1652 registry.registerRun(run)
1653 (inserted_ref,) = registry.insertDatasets(
1654 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1655 )
1656 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1657 query_dataset_type = DatasetType(
1658 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1659 )
1660 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1661 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1662 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1663 (query_datasets_ref,) = query_datasets_result
1664 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1665 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1666 query_dataset_type, collections=[run]
1667 )
1668 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1669 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1670 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1671 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1672 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1673 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1674 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1676 def testEmptyDimensionsQueries(self):
1677 """Test Query and QueryResults objects in the case where there are no
1678 dimensions.
1679 """
1680 # Set up test data: one dataset type, two runs, one dataset in each.
1681 registry = self.makeRegistry()
1682 self.loadData(registry, "base.yaml")
1683 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1684 registry.registerDatasetType(schema)
1685 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1686 run1 = "run1"
1687 run2 = "run2"
1688 registry.registerRun(run1)
1689 registry.registerRun(run2)
1690 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1691 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1692 # Query directly for both of the datasets, and each one, one at a time.
1693 self.checkQueryResults(
1694 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1695 )
1696 self.checkQueryResults(
1697 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1698 [dataset1],
1699 )
1700 self.checkQueryResults(
1701 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1702 [dataset2],
1703 )
1704 # Query for data IDs with no dimensions.
1705 dataIds = registry.queryDataIds([])
1706 self.checkQueryResults(dataIds, [dataId])
1707 # Use queried data IDs to find the datasets.
1708 self.checkQueryResults(
1709 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1710 [dataset1, dataset2],
1711 )
1712 self.checkQueryResults(
1713 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1714 [dataset1],
1715 )
1716 self.checkQueryResults(
1717 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1718 [dataset2],
1719 )
1720 # Now materialize the data ID query results and repeat those tests.
1721 with dataIds.materialize() as dataIds:
1722 self.checkQueryResults(dataIds, [dataId])
1723 self.checkQueryResults(
1724 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1725 [dataset1],
1726 )
1727 self.checkQueryResults(
1728 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1729 [dataset2],
1730 )
1731 # Query for non-empty data IDs, then subset that to get the empty one.
1732 # Repeat the above tests starting from that.
1733 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1734 self.checkQueryResults(dataIds, [dataId])
1735 self.checkQueryResults(
1736 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1737 [dataset1, dataset2],
1738 )
1739 self.checkQueryResults(
1740 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1741 [dataset1],
1742 )
1743 self.checkQueryResults(
1744 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1745 [dataset2],
1746 )
1747 with dataIds.materialize() as dataIds:
1748 self.checkQueryResults(dataIds, [dataId])
1749 self.checkQueryResults(
1750 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1751 [dataset1, dataset2],
1752 )
1753 self.checkQueryResults(
1754 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1755 [dataset1],
1756 )
1757 self.checkQueryResults(
1758 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1759 [dataset2],
1760 )
1761 # Query for non-empty data IDs, then materialize, then subset to get
1762 # the empty one. Repeat again.
1763 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1764 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1765 self.checkQueryResults(dataIds, [dataId])
1766 self.checkQueryResults(
1767 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1768 [dataset1, dataset2],
1769 )
1770 self.checkQueryResults(
1771 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1772 [dataset1],
1773 )
1774 self.checkQueryResults(
1775 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1776 [dataset2],
1777 )
1778 with dataIds.materialize() as dataIds:
1779 self.checkQueryResults(dataIds, [dataId])
1780 self.checkQueryResults(
1781 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1782 [dataset1, dataset2],
1783 )
1784 self.checkQueryResults(
1785 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1786 [dataset1],
1787 )
1788 self.checkQueryResults(
1789 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1790 [dataset2],
1791 )
1792 # Query for non-empty data IDs with a constraint on an empty-data-ID
1793 # dataset that exists.
1794 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1795 self.checkQueryResults(
1796 dataIds.subset(unique=True),
1797 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1798 )
1799 # Again query for non-empty data IDs with a constraint on empty-data-ID
1800 # datasets, but when the datasets don't exist. We delete the existing
1801 # dataset and query just that collection rather than creating a new
1802 # empty collection because this is a bit less likely for our build-time
1803 # logic to shortcut-out (via the collection summaries), and such a
1804 # shortcut would make this test a bit more trivial than we'd like.
1805 registry.removeDatasets([dataset2])
1806 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1807 self.checkQueryResults(dataIds, [])
1809 def testDimensionDataModifications(self):
1810 """Test that modifying dimension records via:
1811 syncDimensionData(..., update=True) and
1812 insertDimensionData(..., replace=True) works as expected, even in the
1813 presence of datasets using those dimensions and spatial overlap
1814 relationships.
1815 """
1817 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1818 """Unpack a sphgeom.RangeSet into the integers it contains."""
1819 for begin, end in ranges:
1820 yield from range(begin, end)
1822 def range_set_hull(
1823 ranges: lsst.sphgeom.RangeSet,
1824 pixelization: lsst.sphgeom.HtmPixelization,
1825 ) -> lsst.sphgeom.ConvexPolygon:
1826 """Create a ConvexPolygon hull of the region defined by a set of
1827 HTM pixelization index ranges.
1828 """
1829 points = []
1830 for index in unpack_range_set(ranges):
1831 points.extend(pixelization.triangle(index).getVertices())
1832 return lsst.sphgeom.ConvexPolygon(points)
1834 # Use HTM to set up an initial parent region (one arbitrary trixel)
1835 # and four child regions (the trixels within the parent at the next
1836 # level. We'll use the parent as a tract/visit region and the children
1837 # as its patch/visit_detector regions.
1838 registry = self.makeRegistry()
1839 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1840 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1841 index = 12288
1842 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1843 assert htm6.universe().contains(child_ranges_small)
1844 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1845 parent_region_small = lsst.sphgeom.ConvexPolygon(
1846 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1847 )
1848 assert all(parent_region_small.contains(c) for c in child_regions_small)
1849 # Make a larger version of each child region, defined to be the set of
1850 # htm6 trixels that overlap the original's bounding circle. Make a new
1851 # parent that's the convex hull of the new children.
1852 child_regions_large = [
1853 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1854 ]
1855 assert all(
1856 large.contains(small)
1857 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1858 )
1859 parent_region_large = lsst.sphgeom.ConvexPolygon(
1860 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1861 )
1862 assert all(parent_region_large.contains(c) for c in child_regions_large)
1863 assert parent_region_large.contains(parent_region_small)
1864 assert not parent_region_small.contains(parent_region_large)
1865 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1866 # Find some commonSkyPix indices that overlap the large regions but not
1867 # overlap the small regions. We use commonSkyPix here to make sure the
1868 # real tests later involve what's in the database, not just post-query
1869 # filtering of regions.
1870 child_difference_indices = []
1871 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1872 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1873 assert difference, "if this is empty, we can't test anything useful with these regions"
1874 assert all(
1875 not commonSkyPix.triangle(d).isDisjointFrom(large)
1876 and commonSkyPix.triangle(d).isDisjointFrom(small)
1877 for d in difference
1878 )
1879 child_difference_indices.append(difference)
1880 parent_difference_indices = list(
1881 unpack_range_set(
1882 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1883 )
1884 )
1885 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1886 assert all(
1887 (
1888 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1889 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1890 )
1891 for d in parent_difference_indices
1892 )
1893 # Now that we've finally got those regions, we'll insert the large ones
1894 # as tract/patch dimension records.
1895 skymap_name = "testing_v1"
1896 registry.insertDimensionData(
1897 "skymap",
1898 {
1899 "name": skymap_name,
1900 "hash": bytes([42]),
1901 "tract_max": 1,
1902 "patch_nx_max": 2,
1903 "patch_ny_max": 2,
1904 },
1905 )
1906 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1907 registry.insertDimensionData(
1908 "patch",
1909 *[
1910 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1911 for n, c in enumerate(child_regions_large)
1912 ],
1913 )
1914 # Add at dataset that uses these dimensions to make sure that modifying
1915 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1916 # implement insert with replace=True as delete-then-insert).
1917 dataset_type = DatasetType(
1918 "coadd",
1919 dimensions=["tract", "patch"],
1920 universe=registry.dimensions,
1921 storageClass="Exposure",
1922 )
1923 registry.registerDatasetType(dataset_type)
1924 registry.registerCollection("the_run", CollectionType.RUN)
1925 registry.insertDatasets(
1926 dataset_type,
1927 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1928 run="the_run",
1929 )
1930 # Query for tracts and patches that overlap some "difference" htm9
1931 # pixels; there should be overlaps, because the database has
1932 # the "large" suite of regions.
1933 self.assertEqual(
1934 {0},
1935 {
1936 data_id["tract"]
1937 for data_id in registry.queryDataIds(
1938 ["tract"],
1939 skymap=skymap_name,
1940 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1941 )
1942 },
1943 )
1944 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1945 self.assertIn(
1946 patch_id,
1947 {
1948 data_id["patch"]
1949 for data_id in registry.queryDataIds(
1950 ["patch"],
1951 skymap=skymap_name,
1952 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1953 )
1954 },
1955 )
1956 # Use sync to update the tract region and insert to update the regions
1957 # of the patches, to the "small" suite.
1958 updated = registry.syncDimensionData(
1959 "tract",
1960 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1961 update=True,
1962 )
1963 self.assertEqual(updated, {"region": parent_region_large})
1964 registry.insertDimensionData(
1965 "patch",
1966 *[
1967 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1968 for n, c in enumerate(child_regions_small)
1969 ],
1970 replace=True,
1971 )
1972 # Query again; there now should be no such overlaps, because the
1973 # database has the "small" suite of regions.
1974 self.assertFalse(
1975 set(
1976 registry.queryDataIds(
1977 ["tract"],
1978 skymap=skymap_name,
1979 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1980 )
1981 )
1982 )
1983 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1984 self.assertNotIn(
1985 patch_id,
1986 {
1987 data_id["patch"]
1988 for data_id in registry.queryDataIds(
1989 ["patch"],
1990 skymap=skymap_name,
1991 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1992 )
1993 },
1994 )
1995 # Update back to the large regions and query one more time.
1996 updated = registry.syncDimensionData(
1997 "tract",
1998 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1999 update=True,
2000 )
2001 self.assertEqual(updated, {"region": parent_region_small})
2002 registry.insertDimensionData(
2003 "patch",
2004 *[
2005 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2006 for n, c in enumerate(child_regions_large)
2007 ],
2008 replace=True,
2009 )
2010 self.assertEqual(
2011 {0},
2012 {
2013 data_id["tract"]
2014 for data_id in registry.queryDataIds(
2015 ["tract"],
2016 skymap=skymap_name,
2017 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2018 )
2019 },
2020 )
2021 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2022 self.assertIn(
2023 patch_id,
2024 {
2025 data_id["patch"]
2026 for data_id in registry.queryDataIds(
2027 ["patch"],
2028 skymap=skymap_name,
2029 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2030 )
2031 },
2032 )
2034 def testCalibrationCollections(self):
2035 """Test operations on `~CollectionType.CALIBRATION` collections,
2036 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2037 `SqlRegistry.findDataset`, and
2038 `DataCoordinateQueryResults.findRelatedDatasets`.
2039 """
2040 # Setup - make a Registry, fill it with some datasets in
2041 # non-calibration collections.
2042 registry = self.makeRegistry()
2043 self.loadData(registry, "base.yaml")
2044 self.loadData(registry, "datasets.yaml")
2045 # Set up some timestamps.
2046 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2047 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2048 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2049 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2050 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2051 allTimespans = [
2052 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2053 ]
2054 # Insert some exposure records with timespans between each sequential
2055 # pair of those.
2056 registry.insertDimensionData(
2057 "exposure",
2058 {
2059 "instrument": "Cam1",
2060 "id": 0,
2061 "obs_id": "zero",
2062 "physical_filter": "Cam1-G",
2063 "timespan": Timespan(t1, t2),
2064 },
2065 {
2066 "instrument": "Cam1",
2067 "id": 1,
2068 "obs_id": "one",
2069 "physical_filter": "Cam1-G",
2070 "timespan": Timespan(t2, t3),
2071 },
2072 {
2073 "instrument": "Cam1",
2074 "id": 2,
2075 "obs_id": "two",
2076 "physical_filter": "Cam1-G",
2077 "timespan": Timespan(t3, t4),
2078 },
2079 {
2080 "instrument": "Cam1",
2081 "id": 3,
2082 "obs_id": "three",
2083 "physical_filter": "Cam1-G",
2084 "timespan": Timespan(t4, t5),
2085 },
2086 )
2087 # Get references to some datasets.
2088 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2089 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2090 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2091 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2092 # Register the main calibration collection we'll be working with.
2093 collection = "Cam1/calibs/default"
2094 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2095 # Cannot associate into a calibration collection (no timespan).
2096 with self.assertRaises(CollectionTypeError):
2097 registry.associate(collection, [bias2a])
2098 # Certify 2a dataset with [t2, t4) validity.
2099 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2100 # Test that we can query for this dataset via the new collection, both
2101 # on its own and with a RUN collection.
2102 self.assertEqual(
2103 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2104 {bias2a},
2105 )
2106 self.assertEqual(
2107 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2108 {
2109 bias2a,
2110 bias2b,
2111 bias3b,
2112 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2113 },
2114 )
2115 self.assertEqual(
2116 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2117 {registry.expandDataId(instrument="Cam1", detector=2)},
2118 )
2119 self.assertEqual(
2120 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2121 {
2122 registry.expandDataId(instrument="Cam1", detector=2),
2123 registry.expandDataId(instrument="Cam1", detector=3),
2124 registry.expandDataId(instrument="Cam1", detector=4),
2125 },
2126 )
2127 self.assertEqual(
2128 set(
2129 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2130 "bias", findFirst=True, collections=[collection]
2131 )
2132 ),
2133 {
2134 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2135 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2136 },
2137 )
2138 self.assertEqual(
2139 set(
2140 registry.queryDataIds(
2141 ["exposure", "detector"], instrument="Cam1", detector=2
2142 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2143 ),
2144 {
2145 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2146 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2147 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2148 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2149 },
2150 )
2152 # We should not be able to certify 2b with anything overlapping that
2153 # window.
2154 with self.assertRaises(ConflictingDefinitionError):
2155 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2156 with self.assertRaises(ConflictingDefinitionError):
2157 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2158 with self.assertRaises(ConflictingDefinitionError):
2159 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2160 with self.assertRaises(ConflictingDefinitionError):
2161 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2162 with self.assertRaises(ConflictingDefinitionError):
2163 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2164 with self.assertRaises(ConflictingDefinitionError):
2165 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2166 with self.assertRaises(ConflictingDefinitionError):
2167 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2168 with self.assertRaises(ConflictingDefinitionError):
2169 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2170 # We should be able to certify 3a with a range overlapping that window,
2171 # because it's for a different detector.
2172 # We'll certify 3a over [t1, t3).
2173 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2174 # Now we'll certify 2b and 3b together over [t4, ∞).
2175 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2177 # Fetch all associations and check that they are what we expect.
2178 self.assertCountEqual(
2179 list(
2180 registry.queryDatasetAssociations(
2181 "bias",
2182 collections=[collection, "imported_g", "imported_r"],
2183 )
2184 ),
2185 [
2186 DatasetAssociation(
2187 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2188 collection="imported_g",
2189 timespan=None,
2190 ),
2191 DatasetAssociation(
2192 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2193 collection="imported_r",
2194 timespan=None,
2195 ),
2196 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2197 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2198 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2199 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2200 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2201 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2202 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2203 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2204 ],
2205 )
2207 class Ambiguous:
2208 """Tag class to denote lookups that should be ambiguous."""
2210 pass
2212 def assertLookup(
2213 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2214 ) -> None:
2215 """Local function that asserts that a bias lookup returns the given
2216 expected result.
2217 """
2218 if expected is Ambiguous:
2219 with self.assertRaises((DatasetTypeError, LookupError)):
2220 registry.findDataset(
2221 "bias",
2222 collections=collection,
2223 instrument="Cam1",
2224 detector=detector,
2225 timespan=timespan,
2226 )
2227 else:
2228 self.assertEqual(
2229 expected,
2230 registry.findDataset(
2231 "bias",
2232 collections=collection,
2233 instrument="Cam1",
2234 detector=detector,
2235 timespan=timespan,
2236 ),
2237 )
2239 # Systematically test lookups against expected results.
2240 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2241 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2242 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2243 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2244 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2245 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2246 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2247 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2248 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2249 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2250 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2251 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2252 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2253 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2254 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2255 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2256 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2257 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2258 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2259 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2260 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2261 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2262 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2263 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2264 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2265 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2266 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2267 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2268 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2269 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2270 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2271 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2272 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2273 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2274 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2275 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2276 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2277 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2278 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2279 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2280 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2281 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2283 # Test lookups via temporal joins to exposures.
2284 self.assertEqual(
2285 set(
2286 registry.queryDataIds(
2287 ["exposure", "detector"], instrument="Cam1", detector=2
2288 ).findRelatedDatasets("bias", collections=[collection])
2289 ),
2290 {
2291 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2292 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2293 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2294 },
2295 )
2296 self.assertEqual(
2297 set(
2298 registry.queryDataIds(
2299 ["exposure", "detector"], instrument="Cam1", detector=3
2300 ).findRelatedDatasets("bias", collections=[collection])
2301 ),
2302 {
2303 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2304 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2305 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2306 },
2307 )
2308 self.assertEqual(
2309 set(
2310 registry.queryDataIds(
2311 ["exposure", "detector"], instrument="Cam1", detector=2
2312 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2313 ),
2314 {
2315 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2316 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2317 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2318 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2319 },
2320 )
2321 self.assertEqual(
2322 set(
2323 registry.queryDataIds(
2324 ["exposure", "detector"], instrument="Cam1", detector=3
2325 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2326 ),
2327 {
2328 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2329 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2330 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2331 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2332 },
2333 )
2335 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2336 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2337 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2338 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2339 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2340 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2341 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2342 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2343 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2344 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2345 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2346 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2347 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2348 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2349 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2350 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2351 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2352 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2353 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2354 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2355 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2356 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2357 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2358 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2359 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2360 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2361 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2362 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2363 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2364 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2365 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2366 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2367 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2368 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2369 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2370 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2371 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2372 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2373 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2374 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2375 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2376 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2377 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2378 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2379 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2380 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2382 # Decertify everything, this time with explicit data IDs, then check
2383 # that no lookups succeed.
2384 registry.decertify(
2385 collection,
2386 "bias",
2387 Timespan(None, None),
2388 dataIds=[
2389 dict(instrument="Cam1", detector=2),
2390 dict(instrument="Cam1", detector=3),
2391 ],
2392 )
2393 for detector in (2, 3):
2394 for timespan in allTimespans:
2395 assertLookup(detector=detector, timespan=timespan, expected=None)
2396 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2397 # those.
2398 registry.certify(
2399 collection,
2400 [bias2a, bias3a],
2401 Timespan(None, None),
2402 )
2403 for timespan in allTimespans:
2404 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2405 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2406 # Decertify just bias2 over [t2, t4).
2407 # This should split a single certification row into two (and leave the
2408 # other existing row, for bias3a, alone).
2409 registry.decertify(
2410 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2411 )
2412 for timespan in allTimespans:
2413 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2414 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2415 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2416 if overlapsBefore and overlapsAfter:
2417 expected = Ambiguous
2418 elif overlapsBefore or overlapsAfter:
2419 expected = bias2a
2420 else:
2421 expected = None
2422 assertLookup(detector=2, timespan=timespan, expected=expected)
2424 def testSkipCalibs(self):
2425 """Test how queries handle skipping of calibration collections."""
2426 registry = self.makeRegistry()
2427 self.loadData(registry, "base.yaml")
2428 self.loadData(registry, "datasets.yaml")
2430 coll_calib = "Cam1/calibs/default"
2431 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2433 # Add all biases to the calibration collection.
2434 # Without this, the logic that prunes dataset subqueries based on
2435 # datasetType-collection summary information will fire before the logic
2436 # we want to test below. This is a good thing (it avoids the dreaded
2437 # NotImplementedError a bit more often) everywhere but here.
2438 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2440 coll_list = [coll_calib, "imported_g", "imported_r"]
2441 chain = "Cam1/chain"
2442 registry.registerCollection(chain, type=CollectionType.CHAINED)
2443 registry.setCollectionChain(chain, coll_list)
2445 # explicit list will raise if findFirst=True or there are temporal
2446 # dimensions
2447 with self.assertRaises(NotImplementedError):
2448 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2449 with self.assertRaises(NotImplementedError):
2450 registry.queryDataIds(
2451 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2452 ).count()
2454 # chain will skip
2455 datasets = list(registry.queryDatasets("bias", collections=chain))
2456 self.assertGreater(len(datasets), 0)
2458 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2459 self.assertGreater(len(dataIds), 0)
2461 # glob will skip too
2462 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2463 self.assertGreater(len(datasets), 0)
2465 # regular expression will skip too
2466 pattern = re.compile(".*")
2467 datasets = list(registry.queryDatasets("bias", collections=pattern))
2468 self.assertGreater(len(datasets), 0)
2470 # ellipsis should work as usual
2471 datasets = list(registry.queryDatasets("bias", collections=...))
2472 self.assertGreater(len(datasets), 0)
2474 # few tests with findFirst
2475 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2476 self.assertGreater(len(datasets), 0)
2478 def testIngestTimeQuery(self):
2479 registry = self.makeRegistry()
2480 self.loadData(registry, "base.yaml")
2481 dt0 = datetime.utcnow()
2482 self.loadData(registry, "datasets.yaml")
2483 dt1 = datetime.utcnow()
2485 datasets = list(registry.queryDatasets(..., collections=...))
2486 len0 = len(datasets)
2487 self.assertGreater(len0, 0)
2489 where = "ingest_date > T'2000-01-01'"
2490 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2491 len1 = len(datasets)
2492 self.assertEqual(len0, len1)
2494 # no one will ever use this piece of software in 30 years
2495 where = "ingest_date > T'2050-01-01'"
2496 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2497 len2 = len(datasets)
2498 self.assertEqual(len2, 0)
2500 # Check more exact timing to make sure there is no 37 seconds offset
2501 # (after fixing DM-30124). SQLite time precision is 1 second, make
2502 # sure that we don't test with higher precision.
2503 tests = [
2504 # format: (timestamp, operator, expected_len)
2505 (dt0 - timedelta(seconds=1), ">", len0),
2506 (dt0 - timedelta(seconds=1), "<", 0),
2507 (dt1 + timedelta(seconds=1), "<", len0),
2508 (dt1 + timedelta(seconds=1), ">", 0),
2509 ]
2510 for dt, op, expect_len in tests:
2511 dt_str = dt.isoformat(sep=" ")
2513 where = f"ingest_date {op} T'{dt_str}'"
2514 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2515 self.assertEqual(len(datasets), expect_len)
2517 # same with bind using datetime or astropy Time
2518 where = f"ingest_date {op} ingest_time"
2519 datasets = list(
2520 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2521 )
2522 self.assertEqual(len(datasets), expect_len)
2524 dt_astropy = astropy.time.Time(dt, format="datetime")
2525 datasets = list(
2526 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2527 )
2528 self.assertEqual(len(datasets), expect_len)
2530 def testTimespanQueries(self):
2531 """Test query expressions involving timespans."""
2532 registry = self.makeRegistry()
2533 self.loadData(registry, "hsc-rc2-subset.yaml")
2534 # All exposures in the database; mapping from ID to timespan.
2535 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2536 # Just those IDs, sorted (which is also temporal sorting, because HSC
2537 # exposure IDs are monotonically increasing).
2538 ids = sorted(visits.keys())
2539 self.assertGreater(len(ids), 20)
2540 # Pick some quasi-random indexes into `ids` to play with.
2541 i1 = int(len(ids) * 0.1)
2542 i2 = int(len(ids) * 0.3)
2543 i3 = int(len(ids) * 0.6)
2544 i4 = int(len(ids) * 0.8)
2545 # Extract some times from those: just before the beginning of i1 (which
2546 # should be after the end of the exposure before), exactly the
2547 # beginning of i2, just after the beginning of i3 (and before its end),
2548 # and the exact end of i4.
2549 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2550 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2551 t2 = visits[ids[i2]].begin
2552 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2553 self.assertLess(t3, visits[ids[i3]].end)
2554 t4 = visits[ids[i4]].end
2555 # Make sure those are actually in order.
2556 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2558 bind = {
2559 "t1": t1,
2560 "t2": t2,
2561 "t3": t3,
2562 "t4": t4,
2563 "ts23": Timespan(t2, t3),
2564 }
2566 def query(where):
2567 """Return results as a sorted, deduplicated list of visit IDs."""
2568 return sorted(
2569 {
2570 dataId["visit"]
2571 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2572 }
2573 )
2575 # Try a bunch of timespan queries, mixing up the bounds themselves,
2576 # where they appear in the expression, and how we get the timespan into
2577 # the expression.
2579 # t1 is before the start of i1, so this should not include i1.
2580 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2581 # t2 is exactly at the start of i2, but ends are exclusive, so these
2582 # should not include i2.
2583 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2584 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2585 # t3 is in the middle of i3, so this should include i3.
2586 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2587 # This one should not include t3 by the same reasoning.
2588 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2589 # t4 is exactly at the end of i4, so this should include i4.
2590 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2591 # i4's upper bound of t4 is exclusive so this should not include t4.
2592 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2594 # Now some timespan vs. time scalar queries.
2595 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2596 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2597 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2598 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2599 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2600 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2602 # Empty timespans should not overlap anything.
2603 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2605 def testCollectionSummaries(self):
2606 """Test recording and retrieval of collection summaries."""
2607 self.maxDiff = None
2608 registry = self.makeRegistry()
2609 # Importing datasets from yaml should go through the code path where
2610 # we update collection summaries as we insert datasets.
2611 self.loadData(registry, "base.yaml")
2612 self.loadData(registry, "datasets.yaml")
2613 flat = registry.getDatasetType("flat")
2614 expected1 = CollectionSummary()
2615 expected1.dataset_types.add(registry.getDatasetType("bias"))
2616 expected1.add_data_ids(
2617 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2618 )
2619 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2620 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2621 # Create a chained collection with both of the imported runs; the
2622 # summary should be the same, because it's a union with itself.
2623 chain = "chain"
2624 registry.registerCollection(chain, CollectionType.CHAINED)
2625 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2626 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2627 # Associate flats only into a tagged collection and a calibration
2628 # collection to check summaries of those.
2629 tag = "tag"
2630 registry.registerCollection(tag, CollectionType.TAGGED)
2631 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2632 calibs = "calibs"
2633 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2634 registry.certify(
2635 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2636 )
2637 expected2 = expected1.copy()
2638 expected2.dataset_types.discard("bias")
2639 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2640 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2641 # Explicitly calling SqlRegistry.refresh() should load those same
2642 # summaries, via a totally different code path.
2643 registry.refresh()
2644 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2645 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2646 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2647 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2649 def testBindInQueryDatasets(self):
2650 """Test that the bind parameter is correctly forwarded in
2651 queryDatasets recursion.
2652 """
2653 registry = self.makeRegistry()
2654 # Importing datasets from yaml should go through the code path where
2655 # we update collection summaries as we insert datasets.
2656 self.loadData(registry, "base.yaml")
2657 self.loadData(registry, "datasets.yaml")
2658 self.assertEqual(
2659 set(registry.queryDatasets("flat", band="r", collections=...)),
2660 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2661 )
2663 def testQueryIntRangeExpressions(self):
2664 """Test integer range expressions in ``where`` arguments.
2666 Note that our expressions use inclusive stop values, unlike Python's.
2667 """
2668 registry = self.makeRegistry()
2669 self.loadData(registry, "base.yaml")
2670 self.assertEqual(
2671 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2672 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2673 )
2674 self.assertEqual(
2675 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2676 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2677 )
2678 self.assertEqual(
2679 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2680 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2681 )
2683 def testQueryResultSummaries(self):
2684 """Test summary methods like `count`, `any`, and `explain_no_results`
2685 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2686 """
2687 registry = self.makeRegistry()
2688 self.loadData(registry, "base.yaml")
2689 self.loadData(registry, "datasets.yaml")
2690 self.loadData(registry, "spatial.yaml")
2691 # Default test dataset has two collections, each with both flats and
2692 # biases. Add a new collection with only biases.
2693 registry.registerCollection("biases", CollectionType.TAGGED)
2694 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2695 # First query yields two results, and involves no postprocessing.
2696 query1 = registry.queryDataIds(["physical_filter"], band="r")
2697 self.assertTrue(query1.any(execute=False, exact=False))
2698 self.assertTrue(query1.any(execute=True, exact=False))
2699 self.assertTrue(query1.any(execute=True, exact=True))
2700 self.assertEqual(query1.count(exact=False), 2)
2701 self.assertEqual(query1.count(exact=True), 2)
2702 self.assertFalse(list(query1.explain_no_results()))
2703 # Second query should yield no results, which we should see when
2704 # we attempt to expand the data ID.
2705 query2 = registry.queryDataIds(["physical_filter"], band="h")
2706 # There's no execute=False, exact=Fals test here because the behavior
2707 # not something we want to guarantee in this case (and exact=False
2708 # says either answer is legal).
2709 self.assertFalse(query2.any(execute=True, exact=False))
2710 self.assertFalse(query2.any(execute=True, exact=True))
2711 self.assertEqual(query2.count(exact=False), 0)
2712 self.assertEqual(query2.count(exact=True), 0)
2713 self.assertTrue(list(query2.explain_no_results()))
2714 # These queries yield no results due to various problems that can be
2715 # spotted prior to execution, yielding helpful diagnostics.
2716 base_query = registry.queryDataIds(["detector", "physical_filter"])
2717 queries_and_snippets = [
2718 (
2719 # Dataset type name doesn't match any existing dataset types.
2720 registry.queryDatasets("nonexistent", collections=...),
2721 ["nonexistent"],
2722 ),
2723 (
2724 # Dataset type object isn't registered.
2725 registry.queryDatasets(
2726 DatasetType(
2727 "nonexistent",
2728 dimensions=["instrument"],
2729 universe=registry.dimensions,
2730 storageClass="Image",
2731 ),
2732 collections=...,
2733 ),
2734 ["nonexistent"],
2735 ),
2736 (
2737 # No datasets of this type in this collection.
2738 registry.queryDatasets("flat", collections=["biases"]),
2739 ["flat", "biases"],
2740 ),
2741 (
2742 # No datasets of this type in this collection.
2743 base_query.findDatasets("flat", collections=["biases"]),
2744 ["flat", "biases"],
2745 ),
2746 (
2747 # No collections matching at all.
2748 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2749 ["potato"],
2750 ),
2751 ]
2752 # The behavior of these additional queries is slated to change in the
2753 # future, so we also check for deprecation warnings.
2754 with self.assertWarns(FutureWarning):
2755 queries_and_snippets.append(
2756 (
2757 # Dataset type name doesn't match any existing dataset
2758 # types.
2759 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2760 ["nonexistent"],
2761 )
2762 )
2763 with self.assertWarns(FutureWarning):
2764 queries_and_snippets.append(
2765 (
2766 # Dataset type name doesn't match any existing dataset
2767 # types.
2768 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2769 ["nonexistent"],
2770 )
2771 )
2772 for query, snippets in queries_and_snippets:
2773 self.assertFalse(query.any(execute=False, exact=False))
2774 self.assertFalse(query.any(execute=True, exact=False))
2775 self.assertFalse(query.any(execute=True, exact=True))
2776 self.assertEqual(query.count(exact=False), 0)
2777 self.assertEqual(query.count(exact=True), 0)
2778 messages = list(query.explain_no_results())
2779 self.assertTrue(messages)
2780 # Want all expected snippets to appear in at least one message.
2781 self.assertTrue(
2782 any(
2783 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2784 ),
2785 messages,
2786 )
2788 # This query does yield results, but should also emit a warning because
2789 # dataset type patterns to queryDataIds is deprecated; just look for
2790 # the warning.
2791 with self.assertWarns(FutureWarning):
2792 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2794 # These queries yield no results due to problems that can be identified
2795 # by cheap follow-up queries, yielding helpful diagnostics.
2796 for query, snippets in [
2797 (
2798 # No records for one of the involved dimensions.
2799 registry.queryDataIds(["subfilter"]),
2800 ["no rows", "subfilter"],
2801 ),
2802 (
2803 # No records for one of the involved dimensions.
2804 registry.queryDimensionRecords("subfilter"),
2805 ["no rows", "subfilter"],
2806 ),
2807 ]:
2808 self.assertFalse(query.any(execute=True, exact=False))
2809 self.assertFalse(query.any(execute=True, exact=True))
2810 self.assertEqual(query.count(exact=True), 0)
2811 messages = list(query.explain_no_results())
2812 self.assertTrue(messages)
2813 # Want all expected snippets to appear in at least one message.
2814 self.assertTrue(
2815 any(
2816 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2817 ),
2818 messages,
2819 )
2821 # This query yields four overlaps in the database, but one is filtered
2822 # out in postprocessing. The count queries aren't accurate because
2823 # they don't account for duplication that happens due to an internal
2824 # join against commonSkyPix.
2825 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2826 self.assertEqual(
2827 {
2828 DataCoordinate.standardize(
2829 instrument="Cam1",
2830 skymap="SkyMap1",
2831 visit=v,
2832 tract=t,
2833 universe=registry.dimensions,
2834 )
2835 for v, t in [(1, 0), (2, 0), (2, 1)]
2836 },
2837 set(query3),
2838 )
2839 self.assertTrue(query3.any(execute=False, exact=False))
2840 self.assertTrue(query3.any(execute=True, exact=False))
2841 self.assertTrue(query3.any(execute=True, exact=True))
2842 self.assertGreaterEqual(query3.count(exact=False), 4)
2843 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2844 self.assertFalse(list(query3.explain_no_results()))
2845 # This query yields overlaps in the database, but all are filtered
2846 # out in postprocessing. The count queries again aren't very useful.
2847 # We have to use `where=` here to avoid an optimization that
2848 # (currently) skips the spatial postprocess-filtering because it
2849 # recognizes that no spatial join is necessary. That's not ideal, but
2850 # fixing it is out of scope for this ticket.
2851 query4 = registry.queryDataIds(
2852 ["visit", "tract"],
2853 instrument="Cam1",
2854 skymap="SkyMap1",
2855 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2856 )
2857 self.assertFalse(set(query4))
2858 self.assertTrue(query4.any(execute=False, exact=False))
2859 self.assertTrue(query4.any(execute=True, exact=False))
2860 self.assertFalse(query4.any(execute=True, exact=True))
2861 self.assertGreaterEqual(query4.count(exact=False), 1)
2862 self.assertEqual(query4.count(exact=True, discard=True), 0)
2863 messages = query4.explain_no_results()
2864 self.assertTrue(messages)
2865 self.assertTrue(any("overlap" in message for message in messages))
2866 # This query should yield results from one dataset type but not the
2867 # other, which is not registered.
2868 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2869 self.assertTrue(set(query5))
2870 self.assertTrue(query5.any(execute=False, exact=False))
2871 self.assertTrue(query5.any(execute=True, exact=False))
2872 self.assertTrue(query5.any(execute=True, exact=True))
2873 self.assertGreaterEqual(query5.count(exact=False), 1)
2874 self.assertGreaterEqual(query5.count(exact=True), 1)
2875 self.assertFalse(list(query5.explain_no_results()))
2876 # This query applies a selection that yields no results, fully in the
2877 # database. Explaining why it fails involves traversing the relation
2878 # tree and running a LIMIT 1 query at each level that has the potential
2879 # to remove rows.
2880 query6 = registry.queryDimensionRecords(
2881 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2882 )
2883 self.assertEqual(query6.count(exact=True), 0)
2884 messages = query6.explain_no_results()
2885 self.assertTrue(messages)
2886 self.assertTrue(any("no-purpose" in message for message in messages))
2888 def testQueryDataIdsExpressionError(self):
2889 """Test error checking of 'where' expressions in queryDataIds."""
2890 registry = self.makeRegistry()
2891 self.loadData(registry, "base.yaml")
2892 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2893 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2894 registry.queryDataIds(["detector"], where="foo.bar = 12")
2895 with self.assertRaisesRegex(
2896 LookupError, "Dimension element name cannot be inferred in this context."
2897 ):
2898 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2900 def testQueryDataIdsOrderBy(self):
2901 """Test order_by and limit on result returned by queryDataIds()."""
2902 registry = self.makeRegistry()
2903 self.loadData(registry, "base.yaml")
2904 self.loadData(registry, "datasets.yaml")
2905 self.loadData(registry, "spatial.yaml")
2907 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2908 return registry.queryDataIds(
2909 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2910 )
2912 Test = namedtuple(
2913 "testQueryDataIdsOrderByTest",
2914 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2915 defaults=(None, None, None),
2916 )
2918 test_data = (
2919 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2920 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2921 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2922 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2923 Test(
2924 "tract.id,visit.id",
2925 "tract,visit",
2926 ((0, 1), (0, 1), (0, 2)),
2927 limit=(3,),
2928 ),
2929 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2930 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2931 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2932 Test(
2933 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2934 ),
2935 Test(
2936 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2937 ),
2938 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2939 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2940 Test(
2941 "tract,-timespan.begin,timespan.end",
2942 "tract,visit",
2943 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2944 ),
2945 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2946 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2947 Test(
2948 "tract,detector",
2949 "tract,detector",
2950 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2951 datasets="flat",
2952 collections="imported_r",
2953 ),
2954 Test(
2955 "tract,detector.full_name",
2956 "tract,detector",
2957 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2958 datasets="flat",
2959 collections="imported_r",
2960 ),
2961 Test(
2962 "tract,detector.raft,detector.name_in_raft",
2963 "tract,detector",
2964 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2965 datasets="flat",
2966 collections="imported_r",
2967 ),
2968 )
2970 for test in test_data:
2971 order_by = test.order_by.split(",")
2972 keys = test.keys.split(",")
2973 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2974 if test.limit is not None:
2975 query = query.limit(*test.limit)
2976 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2977 self.assertEqual(dataIds, test.result)
2979 # and materialize
2980 query = do_query(keys).order_by(*order_by)
2981 if test.limit is not None:
2982 query = query.limit(*test.limit)
2983 with self.assertRaises(RelationalAlgebraError):
2984 with query.materialize():
2985 pass
2987 # errors in a name
2988 for order_by in ("", "-"):
2989 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2990 list(do_query().order_by(order_by))
2992 for order_by in ("undimension.name", "-undimension.name"):
2993 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2994 list(do_query().order_by(order_by))
2996 for order_by in ("attract", "-attract"):
2997 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2998 list(do_query().order_by(order_by))
3000 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3001 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3003 with self.assertRaisesRegex(
3004 ValueError,
3005 r"Timespan exists in more than one dimension element \(exposure, visit\); "
3006 r"qualify timespan with specific dimension name\.",
3007 ):
3008 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3010 with self.assertRaisesRegex(
3011 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3012 ):
3013 list(do_query("tract").order_by("timespan.begin"))
3015 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3016 list(do_query("tract").order_by("tract.timespan.begin"))
3018 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3019 list(do_query("tract").order_by("tract.name"))
3021 with self.assertRaisesRegex(
3022 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3023 ):
3024 list(do_query("visit").order_by("timestamp.begin"))
3026 def testQueryDataIdsGovernorExceptions(self):
3027 """Test exceptions raised by queryDataIds() for incorrect governors."""
3028 registry = self.makeRegistry()
3029 self.loadData(registry, "base.yaml")
3030 self.loadData(registry, "datasets.yaml")
3031 self.loadData(registry, "spatial.yaml")
3033 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3034 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3036 Test = namedtuple(
3037 "testQueryDataIdExceptionsTest",
3038 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3039 defaults=(None, None, None, {}, None, 0),
3040 )
3042 test_data = (
3043 Test("tract,visit", count=6),
3044 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3045 Test(
3046 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3047 ),
3048 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3049 Test(
3050 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3051 ),
3052 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3053 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3054 Test(
3055 "tract,visit",
3056 where="instrument=cam AND skymap=map",
3057 bind={"cam": "Cam1", "map": "SkyMap1"},
3058 count=6,
3059 ),
3060 Test(
3061 "tract,visit",
3062 where="instrument=cam AND skymap=map",
3063 bind={"cam": "Cam", "map": "SkyMap"},
3064 exception=DataIdValueError,
3065 ),
3066 )
3068 for test in test_data:
3069 dimensions = test.dimensions.split(",")
3070 if test.exception:
3071 with self.assertRaises(test.exception):
3072 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3073 else:
3074 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3075 self.assertEqual(query.count(discard=True), test.count)
3077 # and materialize
3078 if test.exception:
3079 with self.assertRaises(test.exception):
3080 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3081 with query.materialize() as materialized:
3082 materialized.count(discard=True)
3083 else:
3084 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3085 with query.materialize() as materialized:
3086 self.assertEqual(materialized.count(discard=True), test.count)
3088 def testQueryDimensionRecordsOrderBy(self):
3089 """Test order_by and limit on result returned by
3090 queryDimensionRecords().
3091 """
3092 registry = self.makeRegistry()
3093 self.loadData(registry, "base.yaml")
3094 self.loadData(registry, "datasets.yaml")
3095 self.loadData(registry, "spatial.yaml")
3097 def do_query(element, datasets=None, collections=None):
3098 return registry.queryDimensionRecords(
3099 element, instrument="Cam1", datasets=datasets, collections=collections
3100 )
3102 query = do_query("detector")
3103 self.assertEqual(len(list(query)), 4)
3105 Test = namedtuple(
3106 "testQueryDataIdsOrderByTest",
3107 ("element", "order_by", "result", "limit", "datasets", "collections"),
3108 defaults=(None, None, None),
3109 )
3111 test_data = (
3112 Test("detector", "detector", (1, 2, 3, 4)),
3113 Test("detector", "-detector", (4, 3, 2, 1)),
3114 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3115 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3116 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3117 Test("visit", "visit", (1, 2)),
3118 Test("visit", "-visit.id", (2, 1)),
3119 Test("visit", "zenith_angle", (1, 2)),
3120 Test("visit", "-visit.name", (2, 1)),
3121 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3122 )
3124 for test in test_data:
3125 order_by = test.order_by.split(",")
3126 query = do_query(test.element).order_by(*order_by)
3127 if test.limit is not None:
3128 query = query.limit(*test.limit)
3129 dataIds = tuple(rec.id for rec in query)
3130 self.assertEqual(dataIds, test.result)
3132 # errors in a name
3133 for order_by in ("", "-"):
3134 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3135 list(do_query("detector").order_by(order_by))
3137 for order_by in ("undimension.name", "-undimension.name"):
3138 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3139 list(do_query("detector").order_by(order_by))
3141 for order_by in ("attract", "-attract"):
3142 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3143 list(do_query("detector").order_by(order_by))
3145 for order_by in ("timestamp.begin", "-timestamp.begin"):
3146 with self.assertRaisesRegex(
3147 ValueError,
3148 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3149 r"perhaps you meant 'timespan.begin'\?",
3150 ):
3151 list(do_query("visit").order_by(order_by))
3153 def testQueryDimensionRecordsExceptions(self):
3154 """Test exceptions raised by queryDimensionRecords()."""
3155 registry = self.makeRegistry()
3156 self.loadData(registry, "base.yaml")
3157 self.loadData(registry, "datasets.yaml")
3158 self.loadData(registry, "spatial.yaml")
3160 result = registry.queryDimensionRecords("detector")
3161 self.assertEqual(result.count(), 4)
3162 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3163 self.assertEqual(result.count(), 4)
3164 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3165 self.assertEqual(result.count(), 4)
3166 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3167 self.assertEqual(result.count(), 4)
3168 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3169 self.assertEqual(result.count(), 4)
3171 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3172 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3173 result.count()
3175 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3176 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3177 result.count()
3179 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3180 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3181 result.count()
3183 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3184 result = registry.queryDimensionRecords(
3185 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3186 )
3187 result.count()
3189 def testDatasetConstrainedDimensionRecordQueries(self):
3190 """Test that queryDimensionRecords works even when given a dataset
3191 constraint whose dimensions extend beyond the requested dimension
3192 element's.
3193 """
3194 registry = self.makeRegistry()
3195 self.loadData(registry, "base.yaml")
3196 self.loadData(registry, "datasets.yaml")
3197 # Query for physical_filter dimension records, using a dataset that
3198 # has both physical_filter and dataset dimensions.
3199 records = registry.queryDimensionRecords(
3200 "physical_filter",
3201 datasets=["flat"],
3202 collections="imported_r",
3203 )
3204 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3205 # Trying to constrain by all dataset types is an error.
3206 with self.assertRaises(TypeError):
3207 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3209 def testSkyPixDatasetQueries(self):
3210 """Test that we can build queries involving skypix dimensions as long
3211 as a dataset type that uses those dimensions is included.
3212 """
3213 registry = self.makeRegistry()
3214 self.loadData(registry, "base.yaml")
3215 dataset_type = DatasetType(
3216 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3217 )
3218 registry.registerDatasetType(dataset_type)
3219 run = "r"
3220 registry.registerRun(run)
3221 # First try queries where there are no datasets; the concern is whether
3222 # we can even build and execute these queries without raising, even
3223 # when "doomed" query shortcuts are in play.
3224 self.assertFalse(
3225 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3226 )
3227 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3228 # Now add a dataset and see that we can get it back.
3229 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3230 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3231 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3232 self.assertEqual(
3233 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3234 {data_id},
3235 )
3236 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3238 def testDatasetIdFactory(self):
3239 """Simple test for DatasetIdFactory, mostly to catch potential changes
3240 in its API.
3241 """
3242 registry = self.makeRegistry()
3243 factory = DatasetIdFactory()
3244 dataset_type = DatasetType(
3245 "datasetType",
3246 dimensions=["detector", "instrument"],
3247 universe=registry.dimensions,
3248 storageClass="int",
3249 )
3250 run = "run"
3251 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3253 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3254 self.assertIsInstance(datasetId, uuid.UUID)
3255 self.assertEqual(datasetId.version, 4)
3257 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3258 self.assertIsInstance(datasetId, uuid.UUID)
3259 self.assertEqual(datasetId.version, 5)
3261 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3262 self.assertIsInstance(datasetId, uuid.UUID)
3263 self.assertEqual(datasetId.version, 5)
3265 def testExposureQueries(self):
3266 """Test query methods using arguments sourced from the exposure log
3267 service.
3269 The most complete test dataset currently available to daf_butler tests
3270 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3271 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3272 dimension records as it was focused on providing nontrivial spatial
3273 overlaps between visit+detector and tract+patch. So in this test we
3274 need to translate queries that originally used the exposure dimension
3275 to use the (very similar) visit dimension instead.
3276 """
3277 registry = self.makeRegistry()
3278 self.loadData(registry, "hsc-rc2-subset.yaml")
3279 self.assertEqual(
3280 [
3281 record.id
3282 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3283 .order_by("id")
3284 .limit(5)
3285 ],
3286 [318, 322, 326, 330, 332],
3287 )
3288 self.assertEqual(
3289 [
3290 data_id["visit"]
3291 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3292 ],
3293 [318, 322, 326, 330, 332],
3294 )
3295 self.assertEqual(
3296 [
3297 record.id
3298 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3299 .order_by("full_name")
3300 .limit(5)
3301 ],
3302 [73, 72, 71, 70, 65],
3303 )
3304 self.assertEqual(
3305 [
3306 data_id["detector"]
3307 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3308 .order_by("full_name")
3309 .limit(5)
3310 ],
3311 [73, 72, 71, 70, 65],
3312 )
3314 def test_long_query_names(self) -> None:
3315 """Test that queries involving very long names are handled correctly.
3317 This is especially important for PostgreSQL, which truncates symbols
3318 longer than 64 chars, but it's worth testing for all DBs.
3319 """
3320 registry = self.makeRegistry()
3321 name = "abcd" * 17
3322 registry.registerDatasetType(
3323 DatasetType(
3324 name,
3325 dimensions=(),
3326 storageClass="Exposure",
3327 universe=registry.dimensions,
3328 )
3329 )
3330 # Need to search more than one collection actually containing a
3331 # matching dataset to avoid optimizations that sidestep bugs due to
3332 # truncation by making findFirst=True a no-op.
3333 run1 = "run1"
3334 registry.registerRun(run1)
3335 run2 = "run2"
3336 registry.registerRun(run2)
3337 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3338 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3339 self.assertEqual(
3340 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3341 {ref1},
3342 )
3344 def test_skypix_constraint_queries(self) -> None:
3345 """Test queries spatially constrained by a skypix data ID."""
3346 registry = self.makeRegistry()
3347 self.loadData(registry, "hsc-rc2-subset.yaml")
3348 patch_regions = {
3349 (data_id["tract"], data_id["patch"]): data_id.region
3350 for data_id in registry.queryDataIds(["patch"]).expanded()
3351 }
3352 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3353 # This check ensures the test doesn't become trivial due to a config
3354 # change; if it does, just pick a different HTML level.
3355 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3356 # Gather all skypix IDs that definitely overlap at least one of these
3357 # patches.
3358 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3359 for patch_region in patch_regions.values():
3360 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3361 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3362 # and does not overlap at least one other patch.
3363 for skypix_id in itertools.chain.from_iterable(
3364 range(begin, end) for begin, end in relevant_skypix_ids
3365 ):
3366 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3367 overlapping_patches = {
3368 patch_key
3369 for patch_key, patch_region in patch_regions.items()
3370 if not patch_region.isDisjointFrom(skypix_region)
3371 }
3372 if overlapping_patches and overlapping_patches != patch_regions.keys():
3373 break
3374 else:
3375 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3376 self.assertEqual(
3377 {
3378 (data_id["tract"], data_id["patch"])
3379 for data_id in registry.queryDataIds(
3380 ["patch"],
3381 dataId={skypix_dimension.name: skypix_id},
3382 )
3383 },
3384 overlapping_patches,
3385 )
3386 # Test that a three-way join that includes the common skypix system in
3387 # the dimensions doesn't generate redundant join terms in the query.
3388 full_data_ids = set(
3389 registry.queryDataIds(
3390 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3391 ).expanded()
3392 )
3393 self.assertGreater(len(full_data_ids), 0)
3394 for data_id in full_data_ids:
3395 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3396 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3398 def test_spatial_constraint_queries(self) -> None:
3399 """Test queries in which one spatial dimension in the constraint (data
3400 ID or ``where`` string) constrains a different spatial dimension in the
3401 query result columns.
3402 """
3403 registry = self.makeRegistry()
3404 self.loadData(registry, "hsc-rc2-subset.yaml")
3405 patch_regions = {
3406 (data_id["tract"], data_id["patch"]): data_id.region
3407 for data_id in registry.queryDataIds(["patch"]).expanded()
3408 }
3409 observation_regions = {
3410 (data_id["visit"], data_id["detector"]): data_id.region
3411 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3412 }
3413 all_combos = {
3414 (patch_key, observation_key)
3415 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3416 }
3417 overlapping_combos = {
3418 (patch_key, observation_key)
3419 for patch_key, observation_key in all_combos
3420 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3421 }
3422 # Check a direct spatial join with no constraint first.
3423 self.assertEqual(
3424 {
3425 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3426 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3427 },
3428 overlapping_combos,
3429 )
3430 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3431 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3432 for patch_key, observation_key in overlapping_combos:
3433 overlaps_by_patch[patch_key].add(observation_key)
3434 overlaps_by_observation[observation_key].add(patch_key)
3435 # Find patches and observations that overlap at least one of the other
3436 # but not all of the other.
3437 nontrivial_patch = next(
3438 iter(
3439 patch_key
3440 for patch_key, observation_keys in overlaps_by_patch.items()
3441 if observation_keys and observation_keys != observation_regions.keys()
3442 )
3443 )
3444 nontrivial_observation = next(
3445 iter(
3446 observation_key
3447 for observation_key, patch_keys in overlaps_by_observation.items()
3448 if patch_keys and patch_keys != patch_regions.keys()
3449 )
3450 )
3451 # Use the nontrivial patches and observations as constraints on the
3452 # other dimensions in various ways, first via a 'where' expression.
3453 # It's better in general to us 'bind' instead of f-strings, but these
3454 # all integers so there are no quoting concerns.
3455 self.assertEqual(
3456 {
3457 (data_id["visit"], data_id["detector"])
3458 for data_id in registry.queryDataIds(
3459 ["visit", "detector"],
3460 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3461 skymap="hsc_rings_v1",
3462 )
3463 },
3464 overlaps_by_patch[nontrivial_patch],
3465 )
3466 self.assertEqual(
3467 {
3468 (data_id["tract"], data_id["patch"])
3469 for data_id in registry.queryDataIds(
3470 ["patch"],
3471 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3472 instrument="HSC",
3473 )
3474 },
3475 overlaps_by_observation[nontrivial_observation],
3476 )
3477 # and then via the dataId argument.
3478 self.assertEqual(
3479 {
3480 (data_id["visit"], data_id["detector"])
3481 for data_id in registry.queryDataIds(
3482 ["visit", "detector"],
3483 dataId={
3484 "tract": nontrivial_patch[0],
3485 "patch": nontrivial_patch[1],
3486 },
3487 skymap="hsc_rings_v1",
3488 )
3489 },
3490 overlaps_by_patch[nontrivial_patch],
3491 )
3492 self.assertEqual(
3493 {
3494 (data_id["tract"], data_id["patch"])
3495 for data_id in registry.queryDataIds(
3496 ["patch"],
3497 dataId={
3498 "visit": nontrivial_observation[0],
3499 "detector": nontrivial_observation[1],
3500 },
3501 instrument="HSC",
3502 )
3503 },
3504 overlaps_by_observation[nontrivial_observation],
3505 )
3507 def test_query_projection_drop_postprocessing(self) -> None:
3508 """Test that projections and deduplications on query objects can
3509 drop post-query region filtering to ensure the query remains in
3510 the SQL engine.
3511 """
3512 registry = self.makeRegistry()
3513 self.loadData(registry, "base.yaml")
3514 self.loadData(registry, "spatial.yaml")
3516 def pop_transfer(tree: Relation) -> Relation:
3517 """If a relation tree terminates with a transfer to a new engine,
3518 return the relation prior to that transfer. If not, return the
3519 original relation.
3520 """
3521 match tree:
3522 case Transfer(target=target):
3523 return target
3524 case _:
3525 return tree
3527 # There's no public way to get a Query object yet, so we get one from a
3528 # DataCoordinateQueryResults private attribute. When a public API is
3529 # available this test should use it.
3530 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3531 # We expect this query to terminate in the iteration engine originally,
3532 # because region-filtering is necessary.
3533 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3534 # If we deduplicate, we usually have to do that downstream of the
3535 # filtering. That means the deduplication has to happen in the
3536 # iteration engine.
3537 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3538 # If we pass drop_postprocessing, we instead drop the region filtering
3539 # so the deduplication can happen in SQL (though there might still be
3540 # transfer to iteration at the tail of the tree that we can ignore;
3541 # that's what the pop_transfer takes care of here).
3542 self.assertIsInstance(
3543 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3544 sql.Engine,
3545 )
3547 def test_query_find_datasets_drop_postprocessing(self) -> None:
3548 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3549 problems with the FindFirstDataset relation operation.
3550 """
3551 # Setup: load some visit, tract, and patch records, and insert two
3552 # datasets with dimensions {visit, patch}, with one in each of two
3553 # RUN collections.
3554 registry = self.makeRegistry()
3555 self.loadData(registry, "base.yaml")
3556 self.loadData(registry, "spatial.yaml")
3557 storage_class = StorageClass("Warpy")
3558 registry.storageClasses.registerStorageClass(storage_class)
3559 dataset_type = DatasetType(
3560 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3561 )
3562 registry.registerDatasetType(dataset_type)
3563 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3564 registry.registerRun("run1")
3565 registry.registerRun("run2")
3566 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3567 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3568 # Query for the dataset using queryDataIds(...).findDatasets(...)
3569 # against only one of the two collections. This should work even
3570 # though the relation returned by queryDataIds ends with
3571 # iteration-engine region-filtering, because we can recognize before
3572 # running the query that there is only one collecton to search and
3573 # hence the (default) findFirst=True is irrelevant, and joining in the
3574 # dataset query commutes past the iteration-engine postprocessing.
3575 query1 = registry.queryDataIds(
3576 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3577 )
3578 self.assertEqual(
3579 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3580 {ref1},
3581 )
3582 # Query for the dataset using queryDataIds(...).findDatasets(...)
3583 # against both collections. This can only work if the FindFirstDataset
3584 # operation can be commuted past the iteration-engine options into SQL.
3585 query2 = registry.queryDataIds(
3586 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3587 )
3588 self.assertEqual(
3589 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3590 {ref2},
3591 )
3593 def test_query_empty_collections(self) -> None:
3594 """Test for registry query methods with empty collections. The methods
3595 should return empty result set (or None when applicable) and provide
3596 "doomed" diagnostics.
3597 """
3598 registry = self.makeRegistry()
3599 self.loadData(registry, "base.yaml")
3600 self.loadData(registry, "datasets.yaml")
3602 # Tests for registry.findDataset()
3603 with self.assertRaises(NoDefaultCollectionError):
3604 registry.findDataset("bias", instrument="Cam1", detector=1)
3605 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3606 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3608 # Tests for registry.queryDatasets()
3609 with self.assertRaises(NoDefaultCollectionError):
3610 registry.queryDatasets("bias")
3611 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3613 result = registry.queryDatasets("bias", collections=[])
3614 self.assertEqual(len(list(result)), 0)
3615 messages = list(result.explain_no_results())
3616 self.assertTrue(messages)
3617 self.assertTrue(any("because collection list is empty" in message for message in messages))
3619 # Tests for registry.queryDataIds()
3620 with self.assertRaises(NoDefaultCollectionError):
3621 registry.queryDataIds("detector", datasets="bias")
3622 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3624 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3625 self.assertEqual(len(list(result)), 0)
3626 messages = list(result.explain_no_results())
3627 self.assertTrue(messages)
3628 self.assertTrue(any("because collection list is empty" in message for message in messages))
3630 # Tests for registry.queryDimensionRecords()
3631 with self.assertRaises(NoDefaultCollectionError):
3632 registry.queryDimensionRecords("detector", datasets="bias")
3633 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3635 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3636 self.assertEqual(len(list(result)), 0)
3637 messages = list(result.explain_no_results())
3638 self.assertTrue(messages)
3639 self.assertTrue(any("because collection list is empty" in message for message in messages))
3641 def test_dataset_followup_spatial_joins(self) -> None:
3642 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3643 is involved.
3644 """
3645 registry = self.makeRegistry()
3646 self.loadData(registry, "base.yaml")
3647 self.loadData(registry, "spatial.yaml")
3648 pvi_dataset_type = DatasetType(
3649 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3650 )
3651 registry.registerDatasetType(pvi_dataset_type)
3652 collection = "datasets"
3653 registry.registerRun(collection)
3654 (pvi1,) = registry.insertDatasets(
3655 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3656 )
3657 (pvi2,) = registry.insertDatasets(
3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3659 )
3660 (pvi3,) = registry.insertDatasets(
3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3662 )
3663 self.assertEqual(
3664 set(
3665 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3666 .expanded()
3667 .findRelatedDatasets("pvi", [collection])
3668 ),
3669 {
3670 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3671 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3672 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3675 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3678 },
3679 )