Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1520 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-01 11:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import logging
36import os
37import re
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Iterator
43from datetime import timedelta
44from typing import TYPE_CHECKING
46import astropy.time
47import sqlalchemy
49try:
50 import numpy as np
51except ImportError:
52 np = None
54import lsst.sphgeom
55from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
57from ..._dataset_association import DatasetAssociation
58from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
59from ..._dataset_type import DatasetType
60from ..._named import NamedValueSet
61from ..._storage_class import StorageClass
62from ..._timespan import Timespan
63from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
64from .._collection_summary import CollectionSummary
65from .._collection_type import CollectionType
66from .._config import RegistryConfig
67from .._exceptions import (
68 ArgumentError,
69 CollectionError,
70 CollectionTypeError,
71 ConflictingDefinitionError,
72 DataIdValueError,
73 DatasetTypeError,
74 InconsistentDataIdError,
75 MissingCollectionError,
76 MissingDatasetTypeError,
77 NoDefaultCollectionError,
78 OrphanedRecordError,
79)
80from ..interfaces import ButlerAttributeExistsError
82if TYPE_CHECKING:
83 from ..sql_registry import SqlRegistry
86class RegistryTests(ABC):
87 """Generic tests for the `SqlRegistry` class that can be subclassed to
88 generate tests for different configurations.
89 """
91 collectionsManager: str | None = None
92 """Name of the collections manager class, if subclass provides value for
93 this member then it overrides name specified in default configuration
94 (`str`).
95 """
97 datasetsManager: str | dict[str, str] | None = None
98 """Name or configuration dictionary of the datasets manager class, if
99 subclass provides value for this member then it overrides name specified
100 in default configuration (`str` or `dict`).
101 """
103 @classmethod
104 @abstractmethod
105 def getDataDir(cls) -> str:
106 """Return the root directory containing test data YAML files."""
107 raise NotImplementedError()
109 def makeRegistryConfig(self) -> RegistryConfig:
110 """Create RegistryConfig used to create a registry.
112 This method should be called by a subclass from `makeRegistry`.
113 Returned instance will be pre-configured based on the values of class
114 members, and default-configured for all other parameters. Subclasses
115 that need default configuration should just instantiate
116 `RegistryConfig` directly.
117 """
118 config = RegistryConfig()
119 if self.collectionsManager:
120 config["managers", "collections"] = self.collectionsManager
121 if self.datasetsManager:
122 config["managers", "datasets"] = self.datasetsManager
123 return config
125 @abstractmethod
126 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None:
127 """Return the SqlRegistry instance to be tested.
129 Parameters
130 ----------
131 share_repo_with : `SqlRegistry`, optional
132 If provided, the new registry should point to the same data
133 repository as this existing registry.
135 Returns
136 -------
137 registry : `SqlRegistry`
138 New `SqlRegistry` instance, or `None` *only* if `share_repo_with`
139 is not `None` and this test case does not support that argument
140 (e.g. it is impossible with in-memory SQLite DBs).
141 """
142 raise NotImplementedError()
144 def loadData(self, registry: SqlRegistry, filename: str):
145 """Load registry test data from ``getDataDir/<filename>``,
146 which should be a YAML import/export file.
147 """
148 from ...transfers import YamlRepoImportBackend
150 with open(os.path.join(self.getDataDir(), filename)) as stream:
151 backend = YamlRepoImportBackend(stream, registry)
152 backend.register()
153 backend.load(datastore=None)
155 def checkQueryResults(self, results, expected):
156 """Check that a query results object contains expected values.
158 Parameters
159 ----------
160 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
161 A lazy-evaluation query results object.
162 expected : `list`
163 A list of `DataCoordinate` o `DatasetRef` objects that should be
164 equal to results of the query, aside from ordering.
165 """
166 self.assertCountEqual(list(results), expected)
167 self.assertEqual(results.count(), len(expected))
168 if expected:
169 self.assertTrue(results.any())
170 else:
171 self.assertFalse(results.any())
173 def testOpaque(self):
174 """Tests for `SqlRegistry.registerOpaqueTable`,
175 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
176 `SqlRegistry.deleteOpaqueData`.
177 """
178 registry = self.makeRegistry()
179 table = "opaque_table_for_testing"
180 registry.registerOpaqueTable(
181 table,
182 spec=ddl.TableSpec(
183 fields=[
184 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
185 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
186 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
187 ],
188 ),
189 )
190 rows = [
191 {"id": 1, "name": "one", "count": None},
192 {"id": 2, "name": "two", "count": 5},
193 {"id": 3, "name": "three", "count": 6},
194 ]
195 registry.insertOpaqueData(table, *rows)
196 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
197 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
198 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
199 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
200 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
201 # Test very long IN clause which exceeds sqlite limit on number of
202 # parameters. SQLite says the limit is 32k but it looks like it is
203 # much higher.
204 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
205 # Two IN clauses, each longer than 1k batch size, first with
206 # duplicates, second has matching elements in different batches (after
207 # sorting).
208 self.assertEqual(
209 rows[0:2],
210 list(
211 registry.fetchOpaqueData(
212 table,
213 id=list(range(1000)) + list(range(100, 0, -1)),
214 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
215 )
216 ),
217 )
218 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
219 registry.deleteOpaqueData(table, id=3)
220 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
221 registry.deleteOpaqueData(table)
222 self.assertEqual([], list(registry.fetchOpaqueData(table)))
224 def testDatasetType(self):
225 """Tests for `SqlRegistry.registerDatasetType` and
226 `SqlRegistry.getDatasetType`.
227 """
228 registry = self.makeRegistry()
229 # Check valid insert
230 datasetTypeName = "test"
231 storageClass = StorageClass("testDatasetType")
232 registry.storageClasses.registerStorageClass(storageClass)
233 dimensions = registry.dimensions.conform(("instrument", "visit"))
234 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
235 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
236 # Inserting for the first time should return True
237 self.assertTrue(registry.registerDatasetType(inDatasetType))
238 outDatasetType1 = registry.getDatasetType(datasetTypeName)
239 self.assertEqual(outDatasetType1, inDatasetType)
241 # Re-inserting should work
242 self.assertFalse(registry.registerDatasetType(inDatasetType))
243 # Except when they are not identical
244 with self.assertRaises(ConflictingDefinitionError):
245 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
246 registry.registerDatasetType(nonIdenticalDatasetType)
248 # Template can be None
249 datasetTypeName = "testNoneTemplate"
250 storageClass = StorageClass("testDatasetType2")
251 registry.storageClasses.registerStorageClass(storageClass)
252 dimensions = registry.dimensions.conform(("instrument", "visit"))
253 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
254 registry.registerDatasetType(inDatasetType)
255 outDatasetType2 = registry.getDatasetType(datasetTypeName)
256 self.assertEqual(outDatasetType2, inDatasetType)
258 allTypes = set(registry.queryDatasetTypes())
259 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
261 def testDimensions(self):
262 """Tests for `SqlRegistry.insertDimensionData`,
263 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
264 """
265 registry = self.makeRegistry()
266 dimensionName = "instrument"
267 dimension = registry.dimensions[dimensionName]
268 dimensionValue = {
269 "name": "DummyCam",
270 "visit_max": 10,
271 "visit_system": 0,
272 "exposure_max": 10,
273 "detector_max": 2,
274 "class_name": "lsst.pipe.base.Instrument",
275 }
276 registry.insertDimensionData(dimensionName, dimensionValue)
277 # Inserting the same value twice should fail
278 with self.assertRaises(sqlalchemy.exc.IntegrityError):
279 registry.insertDimensionData(dimensionName, dimensionValue)
280 # expandDataId should retrieve the record we just inserted
281 self.assertEqual(
282 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
283 .records[dimensionName]
284 .toDict(),
285 dimensionValue,
286 )
287 # expandDataId should raise if there is no record with the given ID.
288 with self.assertRaises(DataIdValueError):
289 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
290 # band doesn't have a table; insert should fail.
291 with self.assertRaises(TypeError):
292 registry.insertDimensionData("band", {"band": "i"})
293 dimensionName2 = "physical_filter"
294 dimension2 = registry.dimensions[dimensionName2]
295 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
296 # Missing required dependency ("instrument") should fail
297 with self.assertRaises(KeyError):
298 registry.insertDimensionData(dimensionName2, dimensionValue2)
299 # Adding required dependency should fix the failure
300 dimensionValue2["instrument"] = "DummyCam"
301 registry.insertDimensionData(dimensionName2, dimensionValue2)
302 # expandDataId should retrieve the record we just inserted.
303 self.assertEqual(
304 registry.expandDataId(
305 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
306 )
307 .records[dimensionName2]
308 .toDict(),
309 dimensionValue2,
310 )
311 # Use syncDimensionData to insert a new record successfully.
312 dimensionName3 = "detector"
313 dimensionValue3 = {
314 "instrument": "DummyCam",
315 "id": 1,
316 "full_name": "one",
317 "name_in_raft": "zero",
318 "purpose": "SCIENCE",
319 }
320 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
321 # Sync that again. Note that one field ("raft") is NULL, and that
322 # should be okay.
323 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
324 # Now try that sync with the same primary key but a different value.
325 # This should fail.
326 with self.assertRaises(ConflictingDefinitionError):
327 registry.syncDimensionData(
328 dimensionName3,
329 {
330 "instrument": "DummyCam",
331 "id": 1,
332 "full_name": "one",
333 "name_in_raft": "four",
334 "purpose": "SCIENCE",
335 },
336 )
338 @unittest.skipIf(np is None, "numpy not available.")
339 def testNumpyDataId(self):
340 """Test that we can use a numpy int in a dataId."""
341 registry = self.makeRegistry()
342 dimensionEntries = [
343 ("instrument", {"instrument": "DummyCam"}),
344 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
345 # Using an np.int64 here fails unless Records.fromDict is also
346 # patched to look for numbers.Integral
347 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
348 ]
349 for args in dimensionEntries:
350 registry.insertDimensionData(*args)
352 # Try a normal integer and something that looks like an int but
353 # is not.
354 for visit_id in (42, np.int64(42)):
355 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
356 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
357 self.assertEqual(expanded["visit"], int(visit_id))
358 self.assertIsInstance(expanded["visit"], int)
360 def testDataIdRelationships(self):
361 """Test that `SqlRegistry.expandDataId` raises an exception when the
362 given keys are inconsistent.
363 """
364 registry = self.makeRegistry()
365 self.loadData(registry, "base.yaml")
366 # Insert a few more dimension records for the next test.
367 registry.insertDimensionData(
368 "exposure",
369 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
370 )
371 registry.insertDimensionData(
372 "exposure",
373 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
374 )
375 registry.insertDimensionData(
376 "visit_system",
377 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
378 )
379 registry.insertDimensionData(
380 "visit",
381 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
382 )
383 registry.insertDimensionData(
384 "visit_definition",
385 {"instrument": "Cam1", "visit": 1, "exposure": 1},
386 )
387 with self.assertRaises(InconsistentDataIdError):
388 registry.expandDataId(
389 {"instrument": "Cam1", "visit": 1, "exposure": 2},
390 )
392 def testDataset(self):
393 """Basic tests for `SqlRegistry.insertDatasets`,
394 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
395 """
396 registry = self.makeRegistry()
397 self.loadData(registry, "base.yaml")
398 run = "tésτ"
399 registry.registerRun(run)
400 datasetType = registry.getDatasetType("bias")
401 dataId = {"instrument": "Cam1", "detector": 2}
402 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
403 outRef = registry.getDataset(ref.id)
404 self.assertIsNotNone(ref.id)
405 self.assertEqual(ref, outRef)
406 with self.assertRaises(ConflictingDefinitionError):
407 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
408 registry.removeDatasets([ref])
409 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
411 def testFindDataset(self):
412 """Tests for `SqlRegistry.findDataset`."""
413 registry = self.makeRegistry()
414 self.loadData(registry, "base.yaml")
415 run = "tésτ"
416 datasetType = registry.getDatasetType("bias")
417 dataId = {"instrument": "Cam1", "detector": 4}
418 registry.registerRun(run)
419 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
420 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
421 self.assertEqual(outputRef, inputRef)
422 # Check that retrieval with invalid dataId raises
423 with self.assertRaises(LookupError):
424 dataId = {"instrument": "Cam1"} # no detector
425 registry.findDataset(datasetType, dataId, collections=run)
426 # Check that different dataIds match to different datasets
427 dataId1 = {"instrument": "Cam1", "detector": 1}
428 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
429 dataId2 = {"instrument": "Cam1", "detector": 2}
430 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
431 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
432 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
433 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
434 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
435 # Check that requesting a non-existing dataId returns None
436 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
437 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
438 # Search more than one collection, in which two have the right
439 # dataset type and another does not.
440 registry.registerRun("empty")
441 self.loadData(registry, "datasets-uuid.yaml")
442 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
443 self.assertIsNotNone(bias1)
444 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
445 self.assertIsNotNone(bias2)
446 self.assertEqual(
447 bias1,
448 registry.findDataset(
449 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
450 ),
451 )
452 self.assertEqual(
453 bias2,
454 registry.findDataset(
455 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
456 ),
457 )
458 # Search more than one collection, with one of them a CALIBRATION
459 # collection.
460 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
461 timespan = Timespan(
462 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
463 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
464 )
465 registry.certify("Cam1/calib", [bias2], timespan=timespan)
466 self.assertEqual(
467 bias1,
468 registry.findDataset(
469 "bias",
470 instrument="Cam1",
471 detector=2,
472 collections=["empty", "imported_g", "Cam1/calib"],
473 timespan=timespan,
474 ),
475 )
476 self.assertEqual(
477 bias2,
478 registry.findDataset(
479 "bias",
480 instrument="Cam1",
481 detector=2,
482 collections=["empty", "Cam1/calib", "imported_g"],
483 timespan=timespan,
484 ),
485 )
486 # If we try to search those same collections without a timespan, it
487 # should still work, since the CALIBRATION collection is ignored.
488 self.assertEqual(
489 bias1,
490 registry.findDataset(
491 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
492 ),
493 )
494 self.assertEqual(
495 bias1,
496 registry.findDataset(
497 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
498 ),
499 )
501 def testRemoveDatasetTypeSuccess(self):
502 """Test that SqlRegistry.removeDatasetType works when there are no
503 datasets of that type present.
504 """
505 registry = self.makeRegistry()
506 self.loadData(registry, "base.yaml")
507 registry.removeDatasetType("flat")
508 with self.assertRaises(MissingDatasetTypeError):
509 registry.getDatasetType("flat")
511 def testRemoveDatasetTypeFailure(self):
512 """Test that SqlRegistry.removeDatasetType raises when there are
513 datasets of that type present or if the dataset type is for a
514 component.
515 """
516 registry = self.makeRegistry()
517 self.loadData(registry, "base.yaml")
518 self.loadData(registry, "datasets.yaml")
519 with self.assertRaises(OrphanedRecordError):
520 registry.removeDatasetType("flat")
521 with self.assertRaises(ValueError):
522 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
524 def testImportDatasetsUUID(self):
525 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
526 if isinstance(self.datasetsManager, str):
527 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
528 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
529 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
530 ".ByDimensionsDatasetRecordStorageManagerUUID"
531 ):
532 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
534 registry = self.makeRegistry()
535 self.loadData(registry, "base.yaml")
536 for run in range(6):
537 registry.registerRun(f"run{run}")
538 datasetTypeBias = registry.getDatasetType("bias")
539 datasetTypeFlat = registry.getDatasetType("flat")
540 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
541 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
542 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
544 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
545 (ref1,) = registry._importDatasets([ref])
546 # UUID is used without change
547 self.assertEqual(ref.id, ref1.id)
549 # All different failure modes
550 refs = (
551 # Importing same DatasetRef with different dataset ID is an error
552 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
553 # Same DatasetId but different DataId
554 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
555 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
556 # Same DatasetRef and DatasetId but different run
557 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
558 )
559 for ref in refs:
560 with self.assertRaises(ConflictingDefinitionError):
561 registry._importDatasets([ref])
563 # Test for non-unique IDs, they can be re-imported multiple times.
564 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
565 with self.subTest(idGenMode=idGenMode):
566 # Make dataset ref with reproducible dataset ID.
567 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
568 (ref1,) = registry._importDatasets([ref])
569 self.assertIsInstance(ref1.id, uuid.UUID)
570 self.assertEqual(ref1.id.version, 5)
571 self.assertEqual(ref1.id, ref.id)
573 # Importing it again is OK
574 (ref2,) = registry._importDatasets([ref1])
575 self.assertEqual(ref2.id, ref1.id)
577 # Cannot import to different run with the same ID
578 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
579 with self.assertRaises(ConflictingDefinitionError):
580 registry._importDatasets([ref])
582 ref = DatasetRef(
583 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
584 )
585 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
586 # Cannot import same DATAID_TYPE ref into a new run
587 with self.assertRaises(ConflictingDefinitionError):
588 (ref2,) = registry._importDatasets([ref])
589 else:
590 # DATAID_TYPE_RUN ref can be imported into a new run
591 (ref2,) = registry._importDatasets([ref])
593 def testDatasetTypeComponentQueries(self):
594 """Test component options when querying for dataset types.
596 All of the behavior here is deprecated, so many of these tests are
597 currently wrapped in a context to check that we get a warning whenever
598 a component dataset is actually returned.
599 """
600 registry = self.makeRegistry()
601 self.loadData(registry, "base.yaml")
602 self.loadData(registry, "datasets.yaml")
603 # Test querying for dataset types with different inputs.
604 # First query for all dataset types; components should only be included
605 # when components=True.
606 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
607 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
608 with self.assertWarns(FutureWarning):
609 self.assertLess(
610 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
611 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
612 )
613 # Use a pattern that can match either parent or components. Again,
614 # components are only returned if components=True.
615 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
616 self.assertEqual(
617 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
618 )
619 with self.assertWarns(FutureWarning):
620 self.assertLess(
621 {"bias", "bias.wcs"},
622 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
623 )
624 # This pattern matches only a component. In this case we also return
625 # that component dataset type if components=None.
626 with self.assertWarns(FutureWarning):
627 self.assertEqual(
628 {"bias.wcs"},
629 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names,
630 )
631 self.assertEqual(
632 set(),
633 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
634 )
635 with self.assertWarns(FutureWarning):
636 self.assertEqual(
637 {"bias.wcs"},
638 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
639 )
640 # Add a dataset type using a StorageClass that we'll then remove; check
641 # that this does not affect our ability to query for dataset types
642 # (though it will warn).
643 tempStorageClass = StorageClass(
644 name="TempStorageClass",
645 components={
646 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
647 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
648 },
649 )
650 registry.storageClasses.registerStorageClass(tempStorageClass)
651 datasetType = DatasetType(
652 "temporary",
653 dimensions=["instrument"],
654 storageClass=tempStorageClass,
655 universe=registry.dimensions,
656 )
657 registry.registerDatasetType(datasetType)
658 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
659 datasetType._storageClass = None
660 del tempStorageClass
661 # Querying for all dataset types, including components, should include
662 # at least all non-component dataset types (and I don't want to
663 # enumerate all of the Exposure components for bias and flat here).
664 with self.assertWarns(FutureWarning):
665 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
666 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
667 self.assertIn("TempStorageClass", cm.output[0])
668 self.assertLess({"bias", "flat", "temporary"}, everything.names)
669 # It should not include "temporary.columns", because we tried to remove
670 # the storage class that would tell it about that. So if the next line
671 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
672 # this part of the test isn't doing anything, because the _unregister
673 # call about isn't simulating the real-life case we want it to
674 # simulate, in which different versions of daf_butler in entirely
675 # different Python processes interact with the same repo.
676 self.assertNotIn("temporary.data", everything.names)
677 # Query for dataset types that start with "temp". This should again
678 # not include the component, and also not fail.
679 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
680 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
681 self.assertIn("TempStorageClass", cm.output[0])
682 self.assertEqual({"temporary"}, startsWithTemp.names)
683 # Querying with no components should not warn at all.
684 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
685 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
686 # Must issue a warning of our own to be captured.
687 logging.getLogger("lsst.daf.butler.registries").warning("test message")
688 self.assertEqual(len(cm.output), 1)
689 self.assertIn("test message", cm.output[0])
691 def testComponentLookups(self):
692 """Test searching for component datasets via their parents.
694 All of the behavior here is deprecated, so many of these tests are
695 currently wrapped in a context to check that we get a warning whenever
696 a component dataset is actually returned.
697 """
698 registry = self.makeRegistry()
699 self.loadData(registry, "base.yaml")
700 self.loadData(registry, "datasets.yaml")
701 # Test getting the child dataset type (which does still exist in the
702 # Registry), and check for consistency with
703 # DatasetRef.makeComponentRef.
704 collection = "imported_g"
705 parentType = registry.getDatasetType("bias")
706 childType = registry.getDatasetType("bias.wcs")
707 parentRefResolved = registry.findDataset(
708 parentType, collections=collection, instrument="Cam1", detector=1
709 )
710 self.assertIsInstance(parentRefResolved, DatasetRef)
711 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
712 # Search for a single dataset with findDataset.
713 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
714 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
715 # Search for detector data IDs constrained by component dataset
716 # existence with queryDataIds.
717 with self.assertWarns(FutureWarning):
718 dataIds = registry.queryDataIds(
719 ["detector"],
720 datasets=["bias.wcs"],
721 collections=collection,
722 ).toSet()
723 self.assertEqual(
724 dataIds,
725 DataCoordinateSet(
726 {
727 DataCoordinate.standardize(
728 instrument="Cam1", detector=d, dimensions=parentType.dimensions
729 )
730 for d in (1, 2, 3)
731 },
732 dimensions=parentType.dimensions,
733 ),
734 )
735 # Search for multiple datasets of a single type with queryDatasets.
736 with self.assertWarns(FutureWarning):
737 childRefs2 = set(
738 registry.queryDatasets(
739 "bias.wcs",
740 collections=collection,
741 )
742 )
743 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType})
744 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds))
746 def testCollections(self):
747 """Tests for registry methods that manage collections."""
748 registry = self.makeRegistry()
749 other_registry = self.makeRegistry(share_repo_with=registry)
750 self.loadData(registry, "base.yaml")
751 self.loadData(registry, "datasets.yaml")
752 run1 = "imported_g"
753 run2 = "imported_r"
754 # Test setting a collection docstring after it has been created.
755 registry.setCollectionDocumentation(run1, "doc for run1")
756 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
757 registry.setCollectionDocumentation(run1, None)
758 self.assertIsNone(registry.getCollectionDocumentation(run1))
759 datasetType = "bias"
760 # Find some datasets via their run's collection.
761 dataId1 = {"instrument": "Cam1", "detector": 1}
762 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
763 self.assertIsNotNone(ref1)
764 dataId2 = {"instrument": "Cam1", "detector": 2}
765 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
766 self.assertIsNotNone(ref2)
767 # Associate those into a new collection, then look for them there.
768 tag1 = "tag1"
769 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
770 # Check that we can query for old and new collections by type.
771 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
772 self.assertEqual(
773 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
774 {tag1, run1, run2},
775 )
776 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
777 registry.associate(tag1, [ref1, ref2])
778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
780 # Disassociate one and verify that we can't it there anymore...
781 registry.disassociate(tag1, [ref1])
782 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
783 # ...but we can still find ref2 in tag1, and ref1 in the run.
784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
786 collections = set(registry.queryCollections())
787 self.assertEqual(collections, {run1, run2, tag1})
788 # Associate both refs into tag1 again; ref2 is already there, but that
789 # should be a harmless no-op.
790 registry.associate(tag1, [ref1, ref2])
791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
792 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
793 # Get a different dataset (from a different run) that has the same
794 # dataset type and data ID as ref2.
795 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
796 self.assertNotEqual(ref2, ref2b)
797 # Attempting to associate that into tag1 should be an error.
798 with self.assertRaises(ConflictingDefinitionError):
799 registry.associate(tag1, [ref2b])
800 # That error shouldn't have messed up what we had before.
801 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
802 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
803 # Attempt to associate the conflicting dataset again, this time with
804 # a dataset that isn't in the collection and won't cause a conflict.
805 # Should also fail without modifying anything.
806 dataId3 = {"instrument": "Cam1", "detector": 3}
807 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
808 with self.assertRaises(ConflictingDefinitionError):
809 registry.associate(tag1, [ref3, ref2b])
810 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
811 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
812 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
813 # Register a chained collection that searches [tag1, run2]
814 chain1 = "chain1"
815 registry.registerCollection(chain1, type=CollectionType.CHAINED)
816 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
817 # Chained collection exists, but has no collections in it.
818 self.assertFalse(registry.getCollectionChain(chain1))
819 # If we query for all collections, we should get the chained collection
820 # only if we don't ask to flatten it (i.e. yield only its children).
821 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
822 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
823 # Attempt to set its child collections to something circular; that
824 # should fail.
825 with self.assertRaises(ValueError):
826 registry.setCollectionChain(chain1, [tag1, chain1])
827 # Add the child collections.
828 registry.setCollectionChain(chain1, [tag1, run2])
829 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
830 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
831 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
832 # Refresh the other registry that points to the same repo, and make
833 # sure it can see the things we've done (note that this does require
834 # an explicit refresh(); that's the documented behavior, because
835 # caching is ~impossible otherwise).
836 if other_registry is not None:
837 other_registry.refresh()
838 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
839 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
840 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
841 # Searching for dataId1 or dataId2 in the chain should return ref1 and
842 # ref2, because both are in tag1.
843 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
844 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
845 # Now disassociate ref2 from tag1. The search (for bias) with
846 # dataId2 in chain1 should then:
847 # 1. not find it in tag1
848 # 2. find a different dataset in run2
849 registry.disassociate(tag1, [ref2])
850 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
851 self.assertNotEqual(ref2b, ref2)
852 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
853 # Define a new chain so we can test recursive chains.
854 chain2 = "chain2"
855 registry.registerCollection(chain2, type=CollectionType.CHAINED)
856 registry.setCollectionChain(chain2, [run2, chain1])
857 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
858 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
859 # Query for collections matching a regex.
860 self.assertCountEqual(
861 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
862 ["imported_r", "imported_g"],
863 )
864 # Query for collections matching a regex or an explicit str.
865 self.assertCountEqual(
866 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
867 ["imported_r", "imported_g", "chain1"],
868 )
869 # Search for bias with dataId1 should find it via tag1 in chain2,
870 # recursing, because is not in run1.
871 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
872 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
873 # Search for bias with dataId2 should find it in run2 (ref2b).
874 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
875 # Search for a flat that is in run2. That should not be found
876 # at the front of chain2, because of the restriction to bias
877 # on run2 there, but it should be found in at the end of chain1.
878 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
879 ref4 = registry.findDataset("flat", dataId4, collections=run2)
880 self.assertIsNotNone(ref4)
881 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
882 # Deleting a collection that's part of a CHAINED collection is not
883 # allowed, and is exception-safe.
884 with self.assertRaises(sqlalchemy.exc.IntegrityError):
885 registry.removeCollection(run2)
886 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
887 with self.assertRaises(sqlalchemy.exc.IntegrityError):
888 registry.removeCollection(chain1)
889 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
890 # Actually remove chain2, test that it's gone by asking for its type.
891 registry.removeCollection(chain2)
892 with self.assertRaises(MissingCollectionError):
893 registry.getCollectionType(chain2)
894 # Actually remove run2 and chain1, which should work now.
895 registry.removeCollection(chain1)
896 registry.removeCollection(run2)
897 with self.assertRaises(MissingCollectionError):
898 registry.getCollectionType(run2)
899 with self.assertRaises(MissingCollectionError):
900 registry.getCollectionType(chain1)
901 # Remove tag1 as well, just to test that we can remove TAGGED
902 # collections.
903 registry.removeCollection(tag1)
904 with self.assertRaises(MissingCollectionError):
905 registry.getCollectionType(tag1)
907 def testCollectionChainFlatten(self):
908 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
909 option.
910 """
911 registry = self.makeRegistry()
912 registry.registerCollection("inner", CollectionType.CHAINED)
913 registry.registerCollection("innermost", CollectionType.RUN)
914 registry.setCollectionChain("inner", ["innermost"])
915 registry.registerCollection("outer", CollectionType.CHAINED)
916 registry.setCollectionChain("outer", ["inner"], flatten=False)
917 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
918 registry.setCollectionChain("outer", ["inner"], flatten=True)
919 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
921 def testBasicTransaction(self):
922 """Test that all operations within a single transaction block are
923 rolled back if an exception propagates out of the block.
924 """
925 registry = self.makeRegistry()
926 storageClass = StorageClass("testDatasetType")
927 registry.storageClasses.registerStorageClass(storageClass)
928 with registry.transaction():
929 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
930 with self.assertRaises(ValueError):
931 with registry.transaction():
932 registry.insertDimensionData("instrument", {"name": "Cam2"})
933 raise ValueError("Oops, something went wrong")
934 # Cam1 should exist
935 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
936 # But Cam2 and Cam3 should both not exist
937 with self.assertRaises(DataIdValueError):
938 registry.expandDataId(instrument="Cam2")
939 with self.assertRaises(DataIdValueError):
940 registry.expandDataId(instrument="Cam3")
942 def testNestedTransaction(self):
943 """Test that operations within a transaction block are not rolled back
944 if an exception propagates out of an inner transaction block and is
945 then caught.
946 """
947 registry = self.makeRegistry()
948 dimension = registry.dimensions["instrument"]
949 dataId1 = {"instrument": "DummyCam"}
950 dataId2 = {"instrument": "DummyCam2"}
951 checkpointReached = False
952 with registry.transaction():
953 # This should be added and (ultimately) committed.
954 registry.insertDimensionData(dimension, dataId1)
955 with self.assertRaises(sqlalchemy.exc.IntegrityError):
956 with registry.transaction(savepoint=True):
957 # This does not conflict, and should succeed (but not
958 # be committed).
959 registry.insertDimensionData(dimension, dataId2)
960 checkpointReached = True
961 # This should conflict and raise, triggerring a rollback
962 # of the previous insertion within the same transaction
963 # context, but not the original insertion in the outer
964 # block.
965 registry.insertDimensionData(dimension, dataId1)
966 self.assertTrue(checkpointReached)
967 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
968 with self.assertRaises(DataIdValueError):
969 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
971 def testInstrumentDimensions(self):
972 """Test queries involving only instrument dimensions, with no joins to
973 skymap.
974 """
975 registry = self.makeRegistry()
977 # need a bunch of dimensions and datasets for test
978 registry.insertDimensionData(
979 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
980 )
981 registry.insertDimensionData(
982 "physical_filter",
983 dict(instrument="DummyCam", name="dummy_r", band="r"),
984 dict(instrument="DummyCam", name="dummy_i", band="i"),
985 )
986 registry.insertDimensionData(
987 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
988 )
989 registry.insertDimensionData(
990 "visit",
991 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
992 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
993 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
994 )
995 for i in range(1, 6):
996 registry.insertDimensionData(
997 "visit_detector_region",
998 dict(instrument="DummyCam", visit=10, detector=i),
999 dict(instrument="DummyCam", visit=11, detector=i),
1000 dict(instrument="DummyCam", visit=20, detector=i),
1001 )
1002 registry.insertDimensionData(
1003 "exposure",
1004 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
1005 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
1006 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
1007 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
1008 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
1009 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
1010 )
1011 registry.insertDimensionData(
1012 "visit_definition",
1013 dict(instrument="DummyCam", exposure=100, visit=10),
1014 dict(instrument="DummyCam", exposure=101, visit=10),
1015 dict(instrument="DummyCam", exposure=110, visit=11),
1016 dict(instrument="DummyCam", exposure=111, visit=11),
1017 dict(instrument="DummyCam", exposure=200, visit=20),
1018 dict(instrument="DummyCam", exposure=201, visit=20),
1019 )
1020 # dataset types
1021 run1 = "test1_r"
1022 run2 = "test2_r"
1023 tagged2 = "test2_t"
1024 registry.registerRun(run1)
1025 registry.registerRun(run2)
1026 registry.registerCollection(tagged2)
1027 storageClass = StorageClass("testDataset")
1028 registry.storageClasses.registerStorageClass(storageClass)
1029 rawType = DatasetType(
1030 name="RAW",
1031 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1032 storageClass=storageClass,
1033 )
1034 registry.registerDatasetType(rawType)
1035 calexpType = DatasetType(
1036 name="CALEXP",
1037 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1038 storageClass=storageClass,
1039 )
1040 registry.registerDatasetType(calexpType)
1042 # add pre-existing datasets
1043 for exposure in (100, 101, 110, 111):
1044 for detector in (1, 2, 3):
1045 # note that only 3 of 5 detectors have datasets
1046 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1047 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1048 # exposures 100 and 101 appear in both run1 and tagged2.
1049 # 100 has different datasets in the different collections
1050 # 101 has the same dataset in both collections.
1051 if exposure == 100:
1052 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1053 if exposure in (100, 101):
1054 registry.associate(tagged2, [ref])
1055 # Add pre-existing datasets to tagged2.
1056 for exposure in (200, 201):
1057 for detector in (3, 4, 5):
1058 # note that only 3 of 5 detectors have datasets
1059 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1060 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1061 registry.associate(tagged2, [ref])
1063 dimensions = registry.dimensions.conform(
1064 rawType.dimensions.required.names | calexpType.dimensions.required.names
1065 )
1066 # Test that single dim string works as well as list of str
1067 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1068 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1069 self.assertEqual(rows, rowsI)
1070 # with empty expression
1071 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1072 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1073 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1074 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1075 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1077 # second collection
1078 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1079 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1080 for dataId in rows:
1081 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1082 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1083 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1084 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1086 # with two input datasets
1087 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1088 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1089 for dataId in rows:
1090 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1091 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1092 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1093 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1095 # limit to single visit
1096 rows = registry.queryDataIds(
1097 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1098 ).toSet()
1099 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1100 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1101 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1102 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1104 # more limiting expression, using link names instead of Table.column
1105 rows = registry.queryDataIds(
1106 dimensions,
1107 datasets=rawType,
1108 collections=run1,
1109 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1110 ).toSet()
1111 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1112 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1113 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1114 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1116 # queryDataIds with only one of `datasets` and `collections` is an
1117 # error.
1118 with self.assertRaises(CollectionError):
1119 registry.queryDataIds(dimensions, datasets=rawType)
1120 with self.assertRaises(ArgumentError):
1121 registry.queryDataIds(dimensions, collections=run1)
1123 # expression excludes everything
1124 rows = registry.queryDataIds(
1125 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1126 ).toSet()
1127 self.assertEqual(len(rows), 0)
1129 # Selecting by physical_filter, this is not in the dimensions, but it
1130 # is a part of the full expression so it should work too.
1131 rows = registry.queryDataIds(
1132 dimensions,
1133 datasets=rawType,
1134 collections=run1,
1135 where="physical_filter = 'dummy_r'",
1136 instrument="DummyCam",
1137 ).toSet()
1138 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1139 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1140 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1141 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1143 def testSkyMapDimensions(self):
1144 """Tests involving only skymap dimensions, no joins to instrument."""
1145 registry = self.makeRegistry()
1147 # need a bunch of dimensions and datasets for test, we want
1148 # "band" in the test so also have to add physical_filter
1149 # dimensions
1150 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1151 registry.insertDimensionData(
1152 "physical_filter",
1153 dict(instrument="DummyCam", name="dummy_r", band="r"),
1154 dict(instrument="DummyCam", name="dummy_i", band="i"),
1155 )
1156 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1157 for tract in range(10):
1158 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1159 registry.insertDimensionData(
1160 "patch",
1161 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1162 )
1164 # dataset types
1165 run = "tésτ"
1166 registry.registerRun(run)
1167 storageClass = StorageClass("testDataset")
1168 registry.storageClasses.registerStorageClass(storageClass)
1169 calexpType = DatasetType(
1170 name="deepCoadd_calexp",
1171 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1172 storageClass=storageClass,
1173 )
1174 registry.registerDatasetType(calexpType)
1175 mergeType = DatasetType(
1176 name="deepCoadd_mergeDet",
1177 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1178 storageClass=storageClass,
1179 )
1180 registry.registerDatasetType(mergeType)
1181 measType = DatasetType(
1182 name="deepCoadd_meas",
1183 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1184 storageClass=storageClass,
1185 )
1186 registry.registerDatasetType(measType)
1188 dimensions = registry.dimensions.conform(
1189 calexpType.dimensions.required.names
1190 | mergeType.dimensions.required.names
1191 | measType.dimensions.required.names
1192 )
1194 # add pre-existing datasets
1195 for tract in (1, 3, 5):
1196 for patch in (2, 4, 6, 7):
1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1198 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1199 for aFilter in ("i", "r"):
1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1201 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1203 # with empty expression
1204 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1205 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1206 for dataId in rows:
1207 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1208 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1209 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1210 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1212 # limit to 2 tracts and 2 patches
1213 rows = registry.queryDataIds(
1214 dimensions,
1215 datasets=[calexpType, mergeType],
1216 collections=run,
1217 where="tract IN (1, 5) AND patch IN (2, 7)",
1218 skymap="DummyMap",
1219 ).toSet()
1220 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1221 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1222 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1223 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1225 # limit to single filter
1226 rows = registry.queryDataIds(
1227 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1228 ).toSet()
1229 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1230 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1231 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1232 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1234 # Specifying non-existing skymap is an exception
1235 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1236 rows = registry.queryDataIds(
1237 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1238 ).toSet()
1240 def testSpatialJoin(self):
1241 """Test queries that involve spatial overlap joins."""
1242 registry = self.makeRegistry()
1243 self.loadData(registry, "hsc-rc2-subset.yaml")
1245 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1246 # the TopologicalFamily they belong to. We'll relate all elements in
1247 # each family to all of the elements in each other family.
1248 families = defaultdict(set)
1249 # Dictionary of {element.name: {dataId: region}}.
1250 regions = {}
1251 for element in registry.dimensions.database_elements:
1252 if element.spatial is not None:
1253 families[element.spatial.name].add(element)
1254 regions[element.name] = {
1255 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1256 }
1258 # If this check fails, it's not necessarily a problem - it may just be
1259 # a reasonable change to the default dimension definitions - but the
1260 # test below depends on there being more than one family to do anything
1261 # useful.
1262 self.assertEqual(len(families), 2)
1264 # Overlap DatabaseDimensionElements with each other.
1265 for family1, family2 in itertools.combinations(families, 2):
1266 for element1, element2 in itertools.product(families[family1], families[family2]):
1267 dimensions = element1.minimal_group | element2.minimal_group
1268 # Construct expected set of overlapping data IDs via a
1269 # brute-force comparison of the regions we've already fetched.
1270 expected = {
1271 DataCoordinate.standardize(
1272 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1273 )
1274 for (dataId1, region1), (dataId2, region2) in itertools.product(
1275 regions[element1.name].items(), regions[element2.name].items()
1276 )
1277 if not region1.isDisjointFrom(region2)
1278 }
1279 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1280 queried = set(registry.queryDataIds(dimensions))
1281 self.assertEqual(expected, queried)
1283 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1284 commonSkyPix = registry.dimensions.commonSkyPix
1285 for elementName, these_regions in regions.items():
1286 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1287 expected = set()
1288 for dataId, region in these_regions.items():
1289 for begin, end in commonSkyPix.pixelization.envelope(region):
1290 expected.update(
1291 DataCoordinate.standardize(
1292 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1293 )
1294 for index in range(begin, end)
1295 )
1296 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1297 queried = set(registry.queryDataIds(dimensions))
1298 self.assertEqual(expected, queried)
1300 def testAbstractQuery(self):
1301 """Test that we can run a query that just lists the known
1302 bands. This is tricky because band is
1303 backed by a query against physical_filter.
1304 """
1305 registry = self.makeRegistry()
1306 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1307 registry.insertDimensionData(
1308 "physical_filter",
1309 dict(instrument="DummyCam", name="dummy_i", band="i"),
1310 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1311 dict(instrument="DummyCam", name="dummy_r", band="r"),
1312 )
1313 rows = registry.queryDataIds(["band"]).toSet()
1314 self.assertCountEqual(
1315 rows,
1316 [
1317 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1318 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1319 ],
1320 )
1322 def testAttributeManager(self):
1323 """Test basic functionality of attribute manager."""
1324 # number of attributes with schema versions in a fresh database,
1325 # 6 managers with 2 records per manager, plus config for dimensions
1326 VERSION_COUNT = 6 * 2 + 1
1328 registry = self.makeRegistry()
1329 attributes = registry._managers.attributes
1331 # check what get() returns for non-existing key
1332 self.assertIsNone(attributes.get("attr"))
1333 self.assertEqual(attributes.get("attr", ""), "")
1334 self.assertEqual(attributes.get("attr", "Value"), "Value")
1335 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1337 # cannot store empty key or value
1338 with self.assertRaises(ValueError):
1339 attributes.set("", "value")
1340 with self.assertRaises(ValueError):
1341 attributes.set("attr", "")
1343 # set value of non-existing key
1344 attributes.set("attr", "value")
1345 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1346 self.assertEqual(attributes.get("attr"), "value")
1348 # update value of existing key
1349 with self.assertRaises(ButlerAttributeExistsError):
1350 attributes.set("attr", "value2")
1352 attributes.set("attr", "value2", force=True)
1353 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1354 self.assertEqual(attributes.get("attr"), "value2")
1356 # delete existing key
1357 self.assertTrue(attributes.delete("attr"))
1358 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1360 # delete non-existing key
1361 self.assertFalse(attributes.delete("non-attr"))
1363 # store bunch of keys and get the list back
1364 data = [
1365 ("version.core", "1.2.3"),
1366 ("version.dimensions", "3.2.1"),
1367 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1368 ]
1369 for key, value in data:
1370 attributes.set(key, value)
1371 items = dict(attributes.items())
1372 for key, value in data:
1373 self.assertEqual(items[key], value)
1375 def testQueryDatasetsDeduplication(self):
1376 """Test that the findFirst option to queryDatasets selects datasets
1377 from collections in the order given".
1378 """
1379 registry = self.makeRegistry()
1380 self.loadData(registry, "base.yaml")
1381 self.loadData(registry, "datasets.yaml")
1382 self.assertCountEqual(
1383 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1384 [
1385 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1386 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1387 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1388 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1389 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1390 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1391 ],
1392 )
1393 self.assertCountEqual(
1394 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1395 [
1396 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1397 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1398 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1399 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1400 ],
1401 )
1402 self.assertCountEqual(
1403 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1404 [
1405 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1406 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1407 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1408 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1409 ],
1410 )
1412 def testQueryResults(self):
1413 """Test querying for data IDs and then manipulating the QueryResults
1414 object returned to perform other queries.
1415 """
1416 registry = self.makeRegistry()
1417 self.loadData(registry, "base.yaml")
1418 self.loadData(registry, "datasets.yaml")
1419 bias = registry.getDatasetType("bias")
1420 flat = registry.getDatasetType("flat")
1421 # Obtain expected results from methods other than those we're testing
1422 # here. That includes:
1423 # - the dimensions of the data IDs we want to query:
1424 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1425 # - the dimensions of some other data IDs we'll extract from that:
1426 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1427 # - the data IDs we expect to obtain from the first queries:
1428 expectedDataIds = DataCoordinateSet(
1429 {
1430 DataCoordinate.standardize(
1431 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1432 )
1433 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1434 },
1435 dimensions=expected_dimensions,
1436 hasFull=False,
1437 hasRecords=False,
1438 )
1439 # - the flat datasets we expect to find from those data IDs, in just
1440 # one collection (so deduplication is irrelevant):
1441 expectedFlats = [
1442 registry.findDataset(
1443 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1444 ),
1445 registry.findDataset(
1446 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1447 ),
1448 registry.findDataset(
1449 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1450 ),
1451 ]
1452 # - the data IDs we expect to extract from that:
1453 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1454 # - the bias datasets we expect to find from those data IDs, after we
1455 # subset-out the physical_filter dimension, both with duplicates:
1456 expectedAllBiases = [
1457 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1458 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1459 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1460 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1461 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1462 ]
1463 # - ...and without duplicates:
1464 expectedDeduplicatedBiases = [
1465 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1466 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1467 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1468 ]
1469 # Test against those expected results, using a "lazy" query for the
1470 # data IDs (which re-executes that query each time we use it to do
1471 # something new).
1472 dataIds = registry.queryDataIds(
1473 ["detector", "physical_filter"],
1474 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1475 instrument="Cam1",
1476 )
1477 self.assertEqual(dataIds.dimensions, expected_dimensions)
1478 self.assertEqual(dataIds.toSet(), expectedDataIds)
1479 self.assertCountEqual(
1480 list(
1481 dataIds.findDatasets(
1482 flat,
1483 collections=["imported_r"],
1484 )
1485 ),
1486 expectedFlats,
1487 )
1488 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1489 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1490 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1491 self.assertCountEqual(
1492 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1493 expectedAllBiases,
1494 )
1495 self.assertCountEqual(
1496 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1497 expectedDeduplicatedBiases,
1498 )
1500 # Searching for a dataset with dimensions we had projected away
1501 # restores those dimensions.
1502 self.assertCountEqual(
1503 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1504 expectedFlats,
1505 )
1507 # Use a component dataset type.
1508 self.assertCountEqual(
1509 [
1510 ref.makeComponentRef("image")
1511 for ref in subsetDataIds.findDatasets(
1512 bias,
1513 collections=["imported_r", "imported_g"],
1514 findFirst=False,
1515 )
1516 ],
1517 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1518 )
1520 # Use a named dataset type that does not exist and a dataset type
1521 # object that does not exist.
1522 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1524 # Test both string name and dataset type object.
1525 test_type: str | DatasetType
1526 for test_type, test_type_name in (
1527 (unknown_type, unknown_type.name),
1528 (unknown_type.name, unknown_type.name),
1529 ):
1530 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1531 list(
1532 subsetDataIds.findDatasets(
1533 test_type, collections=["imported_r", "imported_g"], findFirst=True
1534 )
1535 )
1537 # Materialize the bias dataset queries (only) by putting the results
1538 # into temporary tables, then repeat those tests.
1539 with subsetDataIds.findDatasets(
1540 bias, collections=["imported_r", "imported_g"], findFirst=False
1541 ).materialize() as biases:
1542 self.assertCountEqual(list(biases), expectedAllBiases)
1543 with subsetDataIds.findDatasets(
1544 bias, collections=["imported_r", "imported_g"], findFirst=True
1545 ).materialize() as biases:
1546 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1547 # Materialize the data ID subset query, but not the dataset queries.
1548 with subsetDataIds.materialize() as subsetDataIds:
1549 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1550 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1551 self.assertCountEqual(
1552 list(
1553 subsetDataIds.findDatasets(
1554 bias, collections=["imported_r", "imported_g"], findFirst=False
1555 )
1556 ),
1557 expectedAllBiases,
1558 )
1559 self.assertCountEqual(
1560 list(
1561 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1562 ),
1563 expectedDeduplicatedBiases,
1564 )
1565 # Materialize the dataset queries, too.
1566 with subsetDataIds.findDatasets(
1567 bias, collections=["imported_r", "imported_g"], findFirst=False
1568 ).materialize() as biases:
1569 self.assertCountEqual(list(biases), expectedAllBiases)
1570 with subsetDataIds.findDatasets(
1571 bias, collections=["imported_r", "imported_g"], findFirst=True
1572 ).materialize() as biases:
1573 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1574 # Materialize the original query, but none of the follow-up queries.
1575 with dataIds.materialize() as dataIds:
1576 self.assertEqual(dataIds.dimensions, expected_dimensions)
1577 self.assertEqual(dataIds.toSet(), expectedDataIds)
1578 self.assertCountEqual(
1579 list(
1580 dataIds.findDatasets(
1581 flat,
1582 collections=["imported_r"],
1583 )
1584 ),
1585 expectedFlats,
1586 )
1587 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1588 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1589 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1590 self.assertCountEqual(
1591 list(
1592 subsetDataIds.findDatasets(
1593 bias, collections=["imported_r", "imported_g"], findFirst=False
1594 )
1595 ),
1596 expectedAllBiases,
1597 )
1598 self.assertCountEqual(
1599 list(
1600 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1601 ),
1602 expectedDeduplicatedBiases,
1603 )
1604 # Materialize just the bias dataset queries.
1605 with subsetDataIds.findDatasets(
1606 bias, collections=["imported_r", "imported_g"], findFirst=False
1607 ).materialize() as biases:
1608 self.assertCountEqual(list(biases), expectedAllBiases)
1609 with subsetDataIds.findDatasets(
1610 bias, collections=["imported_r", "imported_g"], findFirst=True
1611 ).materialize() as biases:
1612 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1613 # Materialize the subset data ID query, but not the dataset
1614 # queries.
1615 with subsetDataIds.materialize() as subsetDataIds:
1616 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1617 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1618 self.assertCountEqual(
1619 list(
1620 subsetDataIds.findDatasets(
1621 bias, collections=["imported_r", "imported_g"], findFirst=False
1622 )
1623 ),
1624 expectedAllBiases,
1625 )
1626 self.assertCountEqual(
1627 list(
1628 subsetDataIds.findDatasets(
1629 bias, collections=["imported_r", "imported_g"], findFirst=True
1630 )
1631 ),
1632 expectedDeduplicatedBiases,
1633 )
1634 # Materialize the bias dataset queries, too, so now we're
1635 # materializing every single step.
1636 with subsetDataIds.findDatasets(
1637 bias, collections=["imported_r", "imported_g"], findFirst=False
1638 ).materialize() as biases:
1639 self.assertCountEqual(list(biases), expectedAllBiases)
1640 with subsetDataIds.findDatasets(
1641 bias, collections=["imported_r", "imported_g"], findFirst=True
1642 ).materialize() as biases:
1643 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1645 def testStorageClassPropagation(self):
1646 """Test that queries for datasets respect the storage class passed in
1647 as part of a full dataset type.
1648 """
1649 registry = self.makeRegistry()
1650 self.loadData(registry, "base.yaml")
1651 dataset_type_in_registry = DatasetType(
1652 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1653 )
1654 registry.registerDatasetType(dataset_type_in_registry)
1655 run = "run1"
1656 registry.registerRun(run)
1657 (inserted_ref,) = registry.insertDatasets(
1658 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1659 )
1660 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1661 query_dataset_type = DatasetType(
1662 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1663 )
1664 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1665 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1666 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1667 (query_datasets_ref,) = query_datasets_result
1668 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1669 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1670 query_dataset_type, collections=[run]
1671 )
1672 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1673 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1674 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1675 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1676 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1677 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1678 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1680 def testEmptyDimensionsQueries(self):
1681 """Test Query and QueryResults objects in the case where there are no
1682 dimensions.
1683 """
1684 # Set up test data: one dataset type, two runs, one dataset in each.
1685 registry = self.makeRegistry()
1686 self.loadData(registry, "base.yaml")
1687 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1688 registry.registerDatasetType(schema)
1689 dataId = DataCoordinate.make_empty(registry.dimensions)
1690 run1 = "run1"
1691 run2 = "run2"
1692 registry.registerRun(run1)
1693 registry.registerRun(run2)
1694 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1695 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1696 # Query directly for both of the datasets, and each one, one at a time.
1697 self.checkQueryResults(
1698 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1699 )
1700 self.checkQueryResults(
1701 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1702 [dataset1],
1703 )
1704 self.checkQueryResults(
1705 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1706 [dataset2],
1707 )
1708 # Query for data IDs with no dimensions.
1709 dataIds = registry.queryDataIds([])
1710 self.checkQueryResults(dataIds, [dataId])
1711 # Use queried data IDs to find the datasets.
1712 self.checkQueryResults(
1713 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1714 [dataset1, dataset2],
1715 )
1716 self.checkQueryResults(
1717 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1718 [dataset1],
1719 )
1720 self.checkQueryResults(
1721 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1722 [dataset2],
1723 )
1724 # Now materialize the data ID query results and repeat those tests.
1725 with dataIds.materialize() as dataIds:
1726 self.checkQueryResults(dataIds, [dataId])
1727 self.checkQueryResults(
1728 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1729 [dataset1],
1730 )
1731 self.checkQueryResults(
1732 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1733 [dataset2],
1734 )
1735 # Query for non-empty data IDs, then subset that to get the empty one.
1736 # Repeat the above tests starting from that.
1737 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1738 self.checkQueryResults(dataIds, [dataId])
1739 self.checkQueryResults(
1740 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1741 [dataset1, dataset2],
1742 )
1743 self.checkQueryResults(
1744 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1745 [dataset1],
1746 )
1747 self.checkQueryResults(
1748 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1749 [dataset2],
1750 )
1751 with dataIds.materialize() as dataIds:
1752 self.checkQueryResults(dataIds, [dataId])
1753 self.checkQueryResults(
1754 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1755 [dataset1, dataset2],
1756 )
1757 self.checkQueryResults(
1758 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1759 [dataset1],
1760 )
1761 self.checkQueryResults(
1762 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1763 [dataset2],
1764 )
1765 # Query for non-empty data IDs, then materialize, then subset to get
1766 # the empty one. Repeat again.
1767 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1768 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1769 self.checkQueryResults(dataIds, [dataId])
1770 self.checkQueryResults(
1771 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1772 [dataset1, dataset2],
1773 )
1774 self.checkQueryResults(
1775 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1776 [dataset1],
1777 )
1778 self.checkQueryResults(
1779 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1780 [dataset2],
1781 )
1782 with dataIds.materialize() as dataIds:
1783 self.checkQueryResults(dataIds, [dataId])
1784 self.checkQueryResults(
1785 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1786 [dataset1, dataset2],
1787 )
1788 self.checkQueryResults(
1789 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1790 [dataset1],
1791 )
1792 self.checkQueryResults(
1793 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1794 [dataset2],
1795 )
1796 # Query for non-empty data IDs with a constraint on an empty-data-ID
1797 # dataset that exists.
1798 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1799 self.checkQueryResults(
1800 dataIds.subset(unique=True),
1801 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1802 )
1803 # Again query for non-empty data IDs with a constraint on empty-data-ID
1804 # datasets, but when the datasets don't exist. We delete the existing
1805 # dataset and query just that collection rather than creating a new
1806 # empty collection because this is a bit less likely for our build-time
1807 # logic to shortcut-out (via the collection summaries), and such a
1808 # shortcut would make this test a bit more trivial than we'd like.
1809 registry.removeDatasets([dataset2])
1810 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1811 self.checkQueryResults(dataIds, [])
1813 def testDimensionDataModifications(self):
1814 """Test that modifying dimension records via:
1815 syncDimensionData(..., update=True) and
1816 insertDimensionData(..., replace=True) works as expected, even in the
1817 presence of datasets using those dimensions and spatial overlap
1818 relationships.
1819 """
1821 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1822 """Unpack a sphgeom.RangeSet into the integers it contains."""
1823 for begin, end in ranges:
1824 yield from range(begin, end)
1826 def range_set_hull(
1827 ranges: lsst.sphgeom.RangeSet,
1828 pixelization: lsst.sphgeom.HtmPixelization,
1829 ) -> lsst.sphgeom.ConvexPolygon:
1830 """Create a ConvexPolygon hull of the region defined by a set of
1831 HTM pixelization index ranges.
1832 """
1833 points = []
1834 for index in unpack_range_set(ranges):
1835 points.extend(pixelization.triangle(index).getVertices())
1836 return lsst.sphgeom.ConvexPolygon(points)
1838 # Use HTM to set up an initial parent region (one arbitrary trixel)
1839 # and four child regions (the trixels within the parent at the next
1840 # level. We'll use the parent as a tract/visit region and the children
1841 # as its patch/visit_detector regions.
1842 registry = self.makeRegistry()
1843 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1844 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1845 index = 12288
1846 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1847 assert htm6.universe().contains(child_ranges_small)
1848 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1849 parent_region_small = lsst.sphgeom.ConvexPolygon(
1850 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1851 )
1852 assert all(parent_region_small.contains(c) for c in child_regions_small)
1853 # Make a larger version of each child region, defined to be the set of
1854 # htm6 trixels that overlap the original's bounding circle. Make a new
1855 # parent that's the convex hull of the new children.
1856 child_regions_large = [
1857 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1858 ]
1859 assert all(
1860 large.contains(small)
1861 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1862 )
1863 parent_region_large = lsst.sphgeom.ConvexPolygon(
1864 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1865 )
1866 assert all(parent_region_large.contains(c) for c in child_regions_large)
1867 assert parent_region_large.contains(parent_region_small)
1868 assert not parent_region_small.contains(parent_region_large)
1869 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1870 # Find some commonSkyPix indices that overlap the large regions but not
1871 # overlap the small regions. We use commonSkyPix here to make sure the
1872 # real tests later involve what's in the database, not just post-query
1873 # filtering of regions.
1874 child_difference_indices = []
1875 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1876 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1877 assert difference, "if this is empty, we can't test anything useful with these regions"
1878 assert all(
1879 not commonSkyPix.triangle(d).isDisjointFrom(large)
1880 and commonSkyPix.triangle(d).isDisjointFrom(small)
1881 for d in difference
1882 )
1883 child_difference_indices.append(difference)
1884 parent_difference_indices = list(
1885 unpack_range_set(
1886 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1887 )
1888 )
1889 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1890 assert all(
1891 (
1892 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1893 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1894 )
1895 for d in parent_difference_indices
1896 )
1897 # Now that we've finally got those regions, we'll insert the large ones
1898 # as tract/patch dimension records.
1899 skymap_name = "testing_v1"
1900 registry.insertDimensionData(
1901 "skymap",
1902 {
1903 "name": skymap_name,
1904 "hash": bytes([42]),
1905 "tract_max": 1,
1906 "patch_nx_max": 2,
1907 "patch_ny_max": 2,
1908 },
1909 )
1910 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1911 registry.insertDimensionData(
1912 "patch",
1913 *[
1914 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1915 for n, c in enumerate(child_regions_large)
1916 ],
1917 )
1918 # Add at dataset that uses these dimensions to make sure that modifying
1919 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1920 # implement insert with replace=True as delete-then-insert).
1921 dataset_type = DatasetType(
1922 "coadd",
1923 dimensions=["tract", "patch"],
1924 universe=registry.dimensions,
1925 storageClass="Exposure",
1926 )
1927 registry.registerDatasetType(dataset_type)
1928 registry.registerCollection("the_run", CollectionType.RUN)
1929 registry.insertDatasets(
1930 dataset_type,
1931 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1932 run="the_run",
1933 )
1934 # Query for tracts and patches that overlap some "difference" htm9
1935 # pixels; there should be overlaps, because the database has
1936 # the "large" suite of regions.
1937 self.assertEqual(
1938 {0},
1939 {
1940 data_id["tract"]
1941 for data_id in registry.queryDataIds(
1942 ["tract"],
1943 skymap=skymap_name,
1944 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1945 )
1946 },
1947 )
1948 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1949 self.assertIn(
1950 patch_id,
1951 {
1952 data_id["patch"]
1953 for data_id in registry.queryDataIds(
1954 ["patch"],
1955 skymap=skymap_name,
1956 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1957 )
1958 },
1959 )
1960 # Use sync to update the tract region and insert to update the regions
1961 # of the patches, to the "small" suite.
1962 updated = registry.syncDimensionData(
1963 "tract",
1964 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1965 update=True,
1966 )
1967 self.assertEqual(updated, {"region": parent_region_large})
1968 registry.insertDimensionData(
1969 "patch",
1970 *[
1971 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1972 for n, c in enumerate(child_regions_small)
1973 ],
1974 replace=True,
1975 )
1976 # Query again; there now should be no such overlaps, because the
1977 # database has the "small" suite of regions.
1978 self.assertFalse(
1979 set(
1980 registry.queryDataIds(
1981 ["tract"],
1982 skymap=skymap_name,
1983 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1984 )
1985 )
1986 )
1987 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1988 self.assertNotIn(
1989 patch_id,
1990 {
1991 data_id["patch"]
1992 for data_id in registry.queryDataIds(
1993 ["patch"],
1994 skymap=skymap_name,
1995 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1996 )
1997 },
1998 )
1999 # Update back to the large regions and query one more time.
2000 updated = registry.syncDimensionData(
2001 "tract",
2002 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2003 update=True,
2004 )
2005 self.assertEqual(updated, {"region": parent_region_small})
2006 registry.insertDimensionData(
2007 "patch",
2008 *[
2009 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2010 for n, c in enumerate(child_regions_large)
2011 ],
2012 replace=True,
2013 )
2014 self.assertEqual(
2015 {0},
2016 {
2017 data_id["tract"]
2018 for data_id in registry.queryDataIds(
2019 ["tract"],
2020 skymap=skymap_name,
2021 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2022 )
2023 },
2024 )
2025 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2026 self.assertIn(
2027 patch_id,
2028 {
2029 data_id["patch"]
2030 for data_id in registry.queryDataIds(
2031 ["patch"],
2032 skymap=skymap_name,
2033 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2034 )
2035 },
2036 )
2038 def testCalibrationCollections(self):
2039 """Test operations on `~CollectionType.CALIBRATION` collections,
2040 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2041 `SqlRegistry.findDataset`, and
2042 `DataCoordinateQueryResults.findRelatedDatasets`.
2043 """
2044 # Setup - make a Registry, fill it with some datasets in
2045 # non-calibration collections.
2046 registry = self.makeRegistry()
2047 self.loadData(registry, "base.yaml")
2048 self.loadData(registry, "datasets.yaml")
2049 # Set up some timestamps.
2050 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2051 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2052 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2053 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2054 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2055 allTimespans = [
2056 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2057 ]
2058 # Insert some exposure records with timespans between each sequential
2059 # pair of those.
2060 registry.insertDimensionData(
2061 "exposure",
2062 {
2063 "instrument": "Cam1",
2064 "id": 0,
2065 "obs_id": "zero",
2066 "physical_filter": "Cam1-G",
2067 "timespan": Timespan(t1, t2),
2068 },
2069 {
2070 "instrument": "Cam1",
2071 "id": 1,
2072 "obs_id": "one",
2073 "physical_filter": "Cam1-G",
2074 "timespan": Timespan(t2, t3),
2075 },
2076 {
2077 "instrument": "Cam1",
2078 "id": 2,
2079 "obs_id": "two",
2080 "physical_filter": "Cam1-G",
2081 "timespan": Timespan(t3, t4),
2082 },
2083 {
2084 "instrument": "Cam1",
2085 "id": 3,
2086 "obs_id": "three",
2087 "physical_filter": "Cam1-G",
2088 "timespan": Timespan(t4, t5),
2089 },
2090 )
2091 # Get references to some datasets.
2092 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2093 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2094 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2095 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2096 # Register the main calibration collection we'll be working with.
2097 collection = "Cam1/calibs/default"
2098 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2099 # Cannot associate into a calibration collection (no timespan).
2100 with self.assertRaises(CollectionTypeError):
2101 registry.associate(collection, [bias2a])
2102 # Certify 2a dataset with [t2, t4) validity.
2103 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2104 # Test that we can query for this dataset via the new collection, both
2105 # on its own and with a RUN collection.
2106 self.assertEqual(
2107 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2108 {bias2a},
2109 )
2110 self.assertEqual(
2111 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2112 {
2113 bias2a,
2114 bias2b,
2115 bias3b,
2116 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2117 },
2118 )
2119 self.assertEqual(
2120 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2121 {registry.expandDataId(instrument="Cam1", detector=2)},
2122 )
2123 self.assertEqual(
2124 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2125 {
2126 registry.expandDataId(instrument="Cam1", detector=2),
2127 registry.expandDataId(instrument="Cam1", detector=3),
2128 registry.expandDataId(instrument="Cam1", detector=4),
2129 },
2130 )
2131 self.assertEqual(
2132 set(
2133 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2134 "bias", findFirst=True, collections=[collection]
2135 )
2136 ),
2137 {
2138 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2139 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2140 },
2141 )
2142 self.assertEqual(
2143 set(
2144 registry.queryDataIds(
2145 ["exposure", "detector"], instrument="Cam1", detector=2
2146 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2147 ),
2148 {
2149 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2150 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2151 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2152 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2153 },
2154 )
2156 # We should not be able to certify 2b with anything overlapping that
2157 # window.
2158 with self.assertRaises(ConflictingDefinitionError):
2159 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2160 with self.assertRaises(ConflictingDefinitionError):
2161 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2162 with self.assertRaises(ConflictingDefinitionError):
2163 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2164 with self.assertRaises(ConflictingDefinitionError):
2165 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2166 with self.assertRaises(ConflictingDefinitionError):
2167 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2168 with self.assertRaises(ConflictingDefinitionError):
2169 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2170 with self.assertRaises(ConflictingDefinitionError):
2171 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2172 with self.assertRaises(ConflictingDefinitionError):
2173 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2174 # We should be able to certify 3a with a range overlapping that window,
2175 # because it's for a different detector.
2176 # We'll certify 3a over [t1, t3).
2177 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2178 # Now we'll certify 2b and 3b together over [t4, ∞).
2179 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2181 # Fetch all associations and check that they are what we expect.
2182 self.assertCountEqual(
2183 list(
2184 registry.queryDatasetAssociations(
2185 "bias",
2186 collections=[collection, "imported_g", "imported_r"],
2187 )
2188 ),
2189 [
2190 DatasetAssociation(
2191 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2192 collection="imported_g",
2193 timespan=None,
2194 ),
2195 DatasetAssociation(
2196 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2197 collection="imported_r",
2198 timespan=None,
2199 ),
2200 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2201 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2202 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2203 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2204 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2205 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2206 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2207 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2208 ],
2209 )
2211 class Ambiguous:
2212 """Tag class to denote lookups that should be ambiguous."""
2214 pass
2216 def assertLookup(
2217 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2218 ) -> None:
2219 """Local function that asserts that a bias lookup returns the given
2220 expected result.
2221 """
2222 if expected is Ambiguous:
2223 with self.assertRaises((DatasetTypeError, LookupError)):
2224 registry.findDataset(
2225 "bias",
2226 collections=collection,
2227 instrument="Cam1",
2228 detector=detector,
2229 timespan=timespan,
2230 )
2231 else:
2232 self.assertEqual(
2233 expected,
2234 registry.findDataset(
2235 "bias",
2236 collections=collection,
2237 instrument="Cam1",
2238 detector=detector,
2239 timespan=timespan,
2240 ),
2241 )
2243 # Systematically test lookups against expected results.
2244 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2245 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2246 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2247 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2248 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2249 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2250 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2251 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2252 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2253 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2254 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2255 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2256 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2257 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2258 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2259 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2260 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2261 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2262 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2263 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2264 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2265 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2266 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2267 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2268 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2269 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2270 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2271 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2272 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2273 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2274 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2275 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2276 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2277 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2278 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2279 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2280 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2281 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2282 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2283 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2284 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2285 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2287 # Test lookups via temporal joins to exposures.
2288 self.assertEqual(
2289 set(
2290 registry.queryDataIds(
2291 ["exposure", "detector"], instrument="Cam1", detector=2
2292 ).findRelatedDatasets("bias", collections=[collection])
2293 ),
2294 {
2295 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2296 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2297 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2298 },
2299 )
2300 self.assertEqual(
2301 set(
2302 registry.queryDataIds(
2303 ["exposure", "detector"], instrument="Cam1", detector=3
2304 ).findRelatedDatasets("bias", collections=[collection])
2305 ),
2306 {
2307 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2308 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2309 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2310 },
2311 )
2312 self.assertEqual(
2313 set(
2314 registry.queryDataIds(
2315 ["exposure", "detector"], instrument="Cam1", detector=2
2316 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2317 ),
2318 {
2319 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2320 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2321 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2322 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2323 },
2324 )
2325 self.assertEqual(
2326 set(
2327 registry.queryDataIds(
2328 ["exposure", "detector"], instrument="Cam1", detector=3
2329 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2330 ),
2331 {
2332 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2333 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2334 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2335 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2336 },
2337 )
2339 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2340 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2341 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2342 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2343 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2344 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2345 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2346 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2347 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2348 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2349 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2350 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2351 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2352 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2353 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2354 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2355 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2356 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2357 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2358 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2359 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2360 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2361 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2362 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2363 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2364 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2365 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2366 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2367 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2368 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2369 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2370 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2371 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2372 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2373 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2374 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2375 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2376 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2377 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2378 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2379 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2380 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2381 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2382 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2383 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2384 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2386 # Decertify everything, this time with explicit data IDs, then check
2387 # that no lookups succeed.
2388 registry.decertify(
2389 collection,
2390 "bias",
2391 Timespan(None, None),
2392 dataIds=[
2393 dict(instrument="Cam1", detector=2),
2394 dict(instrument="Cam1", detector=3),
2395 ],
2396 )
2397 for detector in (2, 3):
2398 for timespan in allTimespans:
2399 assertLookup(detector=detector, timespan=timespan, expected=None)
2400 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2401 # those.
2402 registry.certify(
2403 collection,
2404 [bias2a, bias3a],
2405 Timespan(None, None),
2406 )
2407 for timespan in allTimespans:
2408 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2409 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2410 # Decertify just bias2 over [t2, t4).
2411 # This should split a single certification row into two (and leave the
2412 # other existing row, for bias3a, alone).
2413 registry.decertify(
2414 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2415 )
2416 for timespan in allTimespans:
2417 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2418 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2419 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2420 if overlapsBefore and overlapsAfter:
2421 expected = Ambiguous
2422 elif overlapsBefore or overlapsAfter:
2423 expected = bias2a
2424 else:
2425 expected = None
2426 assertLookup(detector=2, timespan=timespan, expected=expected)
2428 def testSkipCalibs(self):
2429 """Test how queries handle skipping of calibration collections."""
2430 registry = self.makeRegistry()
2431 self.loadData(registry, "base.yaml")
2432 self.loadData(registry, "datasets.yaml")
2434 coll_calib = "Cam1/calibs/default"
2435 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2437 # Add all biases to the calibration collection.
2438 # Without this, the logic that prunes dataset subqueries based on
2439 # datasetType-collection summary information will fire before the logic
2440 # we want to test below. This is a good thing (it avoids the dreaded
2441 # NotImplementedError a bit more often) everywhere but here.
2442 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2444 coll_list = [coll_calib, "imported_g", "imported_r"]
2445 chain = "Cam1/chain"
2446 registry.registerCollection(chain, type=CollectionType.CHAINED)
2447 registry.setCollectionChain(chain, coll_list)
2449 # explicit list will raise if findFirst=True or there are temporal
2450 # dimensions
2451 with self.assertRaises(NotImplementedError):
2452 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2453 with self.assertRaises(NotImplementedError):
2454 registry.queryDataIds(
2455 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2456 ).count()
2458 # chain will skip
2459 datasets = list(registry.queryDatasets("bias", collections=chain))
2460 self.assertGreater(len(datasets), 0)
2462 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2463 self.assertGreater(len(dataIds), 0)
2465 # glob will skip too
2466 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2467 self.assertGreater(len(datasets), 0)
2469 # regular expression will skip too
2470 pattern = re.compile(".*")
2471 datasets = list(registry.queryDatasets("bias", collections=pattern))
2472 self.assertGreater(len(datasets), 0)
2474 # ellipsis should work as usual
2475 datasets = list(registry.queryDatasets("bias", collections=...))
2476 self.assertGreater(len(datasets), 0)
2478 # few tests with findFirst
2479 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2480 self.assertGreater(len(datasets), 0)
2482 def testIngestTimeQuery(self):
2483 registry = self.makeRegistry()
2484 self.loadData(registry, "base.yaml")
2485 dt0 = datetime.datetime.now(datetime.UTC)
2486 self.loadData(registry, "datasets.yaml")
2487 dt1 = datetime.datetime.now(datetime.UTC)
2489 datasets = list(registry.queryDatasets(..., collections=...))
2490 len0 = len(datasets)
2491 self.assertGreater(len0, 0)
2493 where = "ingest_date > T'2000-01-01'"
2494 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2495 len1 = len(datasets)
2496 self.assertEqual(len0, len1)
2498 # no one will ever use this piece of software in 30 years
2499 where = "ingest_date > T'2050-01-01'"
2500 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2501 len2 = len(datasets)
2502 self.assertEqual(len2, 0)
2504 # Check more exact timing to make sure there is no 37 seconds offset
2505 # (after fixing DM-30124). SQLite time precision is 1 second, make
2506 # sure that we don't test with higher precision.
2507 tests = [
2508 # format: (timestamp, operator, expected_len)
2509 (dt0 - timedelta(seconds=1), ">", len0),
2510 (dt0 - timedelta(seconds=1), "<", 0),
2511 (dt1 + timedelta(seconds=1), "<", len0),
2512 (dt1 + timedelta(seconds=1), ">", 0),
2513 ]
2514 for dt, op, expect_len in tests:
2515 dt_str = dt.isoformat(sep=" ")
2517 where = f"ingest_date {op} T'{dt_str}'"
2518 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2519 self.assertEqual(len(datasets), expect_len)
2521 # same with bind using datetime or astropy Time
2522 where = f"ingest_date {op} ingest_time"
2523 datasets = list(
2524 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2525 )
2526 self.assertEqual(len(datasets), expect_len)
2528 dt_astropy = astropy.time.Time(dt, format="datetime")
2529 datasets = list(
2530 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2531 )
2532 self.assertEqual(len(datasets), expect_len)
2534 def testTimespanQueries(self):
2535 """Test query expressions involving timespans."""
2536 registry = self.makeRegistry()
2537 self.loadData(registry, "hsc-rc2-subset.yaml")
2538 # All exposures in the database; mapping from ID to timespan.
2539 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2540 # Just those IDs, sorted (which is also temporal sorting, because HSC
2541 # exposure IDs are monotonically increasing).
2542 ids = sorted(visits.keys())
2543 self.assertGreater(len(ids), 20)
2544 # Pick some quasi-random indexes into `ids` to play with.
2545 i1 = int(len(ids) * 0.1)
2546 i2 = int(len(ids) * 0.3)
2547 i3 = int(len(ids) * 0.6)
2548 i4 = int(len(ids) * 0.8)
2549 # Extract some times from those: just before the beginning of i1 (which
2550 # should be after the end of the exposure before), exactly the
2551 # beginning of i2, just after the beginning of i3 (and before its end),
2552 # and the exact end of i4.
2553 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2554 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2555 t2 = visits[ids[i2]].begin
2556 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2557 self.assertLess(t3, visits[ids[i3]].end)
2558 t4 = visits[ids[i4]].end
2559 # Make sure those are actually in order.
2560 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2562 bind = {
2563 "t1": t1,
2564 "t2": t2,
2565 "t3": t3,
2566 "t4": t4,
2567 "ts23": Timespan(t2, t3),
2568 }
2570 def query(where):
2571 """Return results as a sorted, deduplicated list of visit IDs."""
2572 return sorted(
2573 {
2574 dataId["visit"]
2575 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2576 }
2577 )
2579 # Try a bunch of timespan queries, mixing up the bounds themselves,
2580 # where they appear in the expression, and how we get the timespan into
2581 # the expression.
2583 # t1 is before the start of i1, so this should not include i1.
2584 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2585 # t2 is exactly at the start of i2, but ends are exclusive, so these
2586 # should not include i2.
2587 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2588 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2589 # t3 is in the middle of i3, so this should include i3.
2590 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2591 # This one should not include t3 by the same reasoning.
2592 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2593 # t4 is exactly at the end of i4, so this should include i4.
2594 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2595 # i4's upper bound of t4 is exclusive so this should not include t4.
2596 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2598 # Now some timespan vs. time scalar queries.
2599 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2600 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2601 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2602 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2603 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2604 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2606 # Empty timespans should not overlap anything.
2607 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2609 def testCollectionSummaries(self):
2610 """Test recording and retrieval of collection summaries."""
2611 self.maxDiff = None
2612 registry = self.makeRegistry()
2613 # Importing datasets from yaml should go through the code path where
2614 # we update collection summaries as we insert datasets.
2615 self.loadData(registry, "base.yaml")
2616 self.loadData(registry, "datasets.yaml")
2617 flat = registry.getDatasetType("flat")
2618 expected1 = CollectionSummary()
2619 expected1.dataset_types.add(registry.getDatasetType("bias"))
2620 expected1.add_data_ids(
2621 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2622 )
2623 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2624 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2625 # Create a chained collection with both of the imported runs; the
2626 # summary should be the same, because it's a union with itself.
2627 chain = "chain"
2628 registry.registerCollection(chain, CollectionType.CHAINED)
2629 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2630 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2631 # Associate flats only into a tagged collection and a calibration
2632 # collection to check summaries of those.
2633 tag = "tag"
2634 registry.registerCollection(tag, CollectionType.TAGGED)
2635 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2636 calibs = "calibs"
2637 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2638 registry.certify(
2639 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2640 )
2641 expected2 = expected1.copy()
2642 expected2.dataset_types.discard("bias")
2643 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2644 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2645 # Explicitly calling SqlRegistry.refresh() should load those same
2646 # summaries, via a totally different code path.
2647 registry.refresh()
2648 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2649 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2650 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2651 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2653 def testBindInQueryDatasets(self):
2654 """Test that the bind parameter is correctly forwarded in
2655 queryDatasets recursion.
2656 """
2657 registry = self.makeRegistry()
2658 # Importing datasets from yaml should go through the code path where
2659 # we update collection summaries as we insert datasets.
2660 self.loadData(registry, "base.yaml")
2661 self.loadData(registry, "datasets.yaml")
2662 self.assertEqual(
2663 set(registry.queryDatasets("flat", band="r", collections=...)),
2664 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2665 )
2667 def testQueryIntRangeExpressions(self):
2668 """Test integer range expressions in ``where`` arguments.
2670 Note that our expressions use inclusive stop values, unlike Python's.
2671 """
2672 registry = self.makeRegistry()
2673 self.loadData(registry, "base.yaml")
2674 self.assertEqual(
2675 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2676 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2677 )
2678 self.assertEqual(
2679 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2680 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2681 )
2682 self.assertEqual(
2683 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2684 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2685 )
2687 def testQueryResultSummaries(self):
2688 """Test summary methods like `count`, `any`, and `explain_no_results`
2689 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2690 """
2691 registry = self.makeRegistry()
2692 self.loadData(registry, "base.yaml")
2693 self.loadData(registry, "datasets.yaml")
2694 self.loadData(registry, "spatial.yaml")
2695 # Default test dataset has two collections, each with both flats and
2696 # biases. Add a new collection with only biases.
2697 registry.registerCollection("biases", CollectionType.TAGGED)
2698 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2699 # First query yields two results, and involves no postprocessing.
2700 query1 = registry.queryDataIds(["physical_filter"], band="r")
2701 self.assertTrue(query1.any(execute=False, exact=False))
2702 self.assertTrue(query1.any(execute=True, exact=False))
2703 self.assertTrue(query1.any(execute=True, exact=True))
2704 self.assertEqual(query1.count(exact=False), 2)
2705 self.assertEqual(query1.count(exact=True), 2)
2706 self.assertFalse(list(query1.explain_no_results()))
2707 # Second query should yield no results, which we should see when
2708 # we attempt to expand the data ID.
2709 query2 = registry.queryDataIds(["physical_filter"], band="h")
2710 # There's no execute=False, exact=Fals test here because the behavior
2711 # not something we want to guarantee in this case (and exact=False
2712 # says either answer is legal).
2713 self.assertFalse(query2.any(execute=True, exact=False))
2714 self.assertFalse(query2.any(execute=True, exact=True))
2715 self.assertEqual(query2.count(exact=False), 0)
2716 self.assertEqual(query2.count(exact=True), 0)
2717 self.assertTrue(list(query2.explain_no_results()))
2718 # These queries yield no results due to various problems that can be
2719 # spotted prior to execution, yielding helpful diagnostics.
2720 base_query = registry.queryDataIds(["detector", "physical_filter"])
2721 queries_and_snippets = [
2722 (
2723 # Dataset type name doesn't match any existing dataset types.
2724 registry.queryDatasets("nonexistent", collections=...),
2725 ["nonexistent"],
2726 ),
2727 (
2728 # Dataset type object isn't registered.
2729 registry.queryDatasets(
2730 DatasetType(
2731 "nonexistent",
2732 dimensions=["instrument"],
2733 universe=registry.dimensions,
2734 storageClass="Image",
2735 ),
2736 collections=...,
2737 ),
2738 ["nonexistent"],
2739 ),
2740 (
2741 # No datasets of this type in this collection.
2742 registry.queryDatasets("flat", collections=["biases"]),
2743 ["flat", "biases"],
2744 ),
2745 (
2746 # No datasets of this type in this collection.
2747 base_query.findDatasets("flat", collections=["biases"]),
2748 ["flat", "biases"],
2749 ),
2750 (
2751 # No collections matching at all.
2752 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2753 ["potato"],
2754 ),
2755 ]
2756 # The behavior of these additional queries is slated to change in the
2757 # future, so we also check for deprecation warnings.
2758 with self.assertWarns(FutureWarning):
2759 queries_and_snippets.append(
2760 (
2761 # Dataset type name doesn't match any existing dataset
2762 # types.
2763 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2764 ["nonexistent"],
2765 )
2766 )
2767 with self.assertWarns(FutureWarning):
2768 queries_and_snippets.append(
2769 (
2770 # Dataset type name doesn't match any existing dataset
2771 # types.
2772 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2773 ["nonexistent"],
2774 )
2775 )
2776 for query, snippets in queries_and_snippets:
2777 self.assertFalse(query.any(execute=False, exact=False))
2778 self.assertFalse(query.any(execute=True, exact=False))
2779 self.assertFalse(query.any(execute=True, exact=True))
2780 self.assertEqual(query.count(exact=False), 0)
2781 self.assertEqual(query.count(exact=True), 0)
2782 messages = list(query.explain_no_results())
2783 self.assertTrue(messages)
2784 # Want all expected snippets to appear in at least one message.
2785 self.assertTrue(
2786 any(
2787 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2788 ),
2789 messages,
2790 )
2792 # This query does yield results, but should also emit a warning because
2793 # dataset type patterns to queryDataIds is deprecated; just look for
2794 # the warning.
2795 with self.assertWarns(FutureWarning):
2796 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2798 # These queries yield no results due to problems that can be identified
2799 # by cheap follow-up queries, yielding helpful diagnostics.
2800 for query, snippets in [
2801 (
2802 # No records for one of the involved dimensions.
2803 registry.queryDataIds(["subfilter"]),
2804 ["no rows", "subfilter"],
2805 ),
2806 (
2807 # No records for one of the involved dimensions.
2808 registry.queryDimensionRecords("subfilter"),
2809 ["no rows", "subfilter"],
2810 ),
2811 ]:
2812 self.assertFalse(query.any(execute=True, exact=False))
2813 self.assertFalse(query.any(execute=True, exact=True))
2814 self.assertEqual(query.count(exact=True), 0)
2815 messages = list(query.explain_no_results())
2816 self.assertTrue(messages)
2817 # Want all expected snippets to appear in at least one message.
2818 self.assertTrue(
2819 any(
2820 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2821 ),
2822 messages,
2823 )
2825 # This query yields four overlaps in the database, but one is filtered
2826 # out in postprocessing. The count queries aren't accurate because
2827 # they don't account for duplication that happens due to an internal
2828 # join against commonSkyPix.
2829 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2830 self.assertEqual(
2831 {
2832 DataCoordinate.standardize(
2833 instrument="Cam1",
2834 skymap="SkyMap1",
2835 visit=v,
2836 tract=t,
2837 universe=registry.dimensions,
2838 )
2839 for v, t in [(1, 0), (2, 0), (2, 1)]
2840 },
2841 set(query3),
2842 )
2843 self.assertTrue(query3.any(execute=False, exact=False))
2844 self.assertTrue(query3.any(execute=True, exact=False))
2845 self.assertTrue(query3.any(execute=True, exact=True))
2846 self.assertGreaterEqual(query3.count(exact=False), 4)
2847 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2848 self.assertFalse(list(query3.explain_no_results()))
2849 # This query yields overlaps in the database, but all are filtered
2850 # out in postprocessing. The count queries again aren't very useful.
2851 # We have to use `where=` here to avoid an optimization that
2852 # (currently) skips the spatial postprocess-filtering because it
2853 # recognizes that no spatial join is necessary. That's not ideal, but
2854 # fixing it is out of scope for this ticket.
2855 query4 = registry.queryDataIds(
2856 ["visit", "tract"],
2857 instrument="Cam1",
2858 skymap="SkyMap1",
2859 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2860 )
2861 self.assertFalse(set(query4))
2862 self.assertTrue(query4.any(execute=False, exact=False))
2863 self.assertTrue(query4.any(execute=True, exact=False))
2864 self.assertFalse(query4.any(execute=True, exact=True))
2865 self.assertGreaterEqual(query4.count(exact=False), 1)
2866 self.assertEqual(query4.count(exact=True, discard=True), 0)
2867 messages = query4.explain_no_results()
2868 self.assertTrue(messages)
2869 self.assertTrue(any("overlap" in message for message in messages))
2870 # This query should yield results from one dataset type but not the
2871 # other, which is not registered.
2872 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2873 self.assertTrue(set(query5))
2874 self.assertTrue(query5.any(execute=False, exact=False))
2875 self.assertTrue(query5.any(execute=True, exact=False))
2876 self.assertTrue(query5.any(execute=True, exact=True))
2877 self.assertGreaterEqual(query5.count(exact=False), 1)
2878 self.assertGreaterEqual(query5.count(exact=True), 1)
2879 self.assertFalse(list(query5.explain_no_results()))
2880 # This query applies a selection that yields no results, fully in the
2881 # database. Explaining why it fails involves traversing the relation
2882 # tree and running a LIMIT 1 query at each level that has the potential
2883 # to remove rows.
2884 query6 = registry.queryDimensionRecords(
2885 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2886 )
2887 self.assertEqual(query6.count(exact=True), 0)
2888 messages = query6.explain_no_results()
2889 self.assertTrue(messages)
2890 self.assertTrue(any("no-purpose" in message for message in messages))
2892 def testQueryDataIdsExpressionError(self):
2893 """Test error checking of 'where' expressions in queryDataIds."""
2894 registry = self.makeRegistry()
2895 self.loadData(registry, "base.yaml")
2896 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2897 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2898 registry.queryDataIds(["detector"], where="foo.bar = 12")
2899 with self.assertRaisesRegex(
2900 LookupError, "Dimension element name cannot be inferred in this context."
2901 ):
2902 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2904 def testQueryDataIdsOrderBy(self):
2905 """Test order_by and limit on result returned by queryDataIds()."""
2906 registry = self.makeRegistry()
2907 self.loadData(registry, "base.yaml")
2908 self.loadData(registry, "datasets.yaml")
2909 self.loadData(registry, "spatial.yaml")
2911 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2912 return registry.queryDataIds(
2913 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2914 )
2916 Test = namedtuple(
2917 "testQueryDataIdsOrderByTest",
2918 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2919 defaults=(None, None, None),
2920 )
2922 test_data = (
2923 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2924 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2925 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2926 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2927 Test(
2928 "tract.id,visit.id",
2929 "tract,visit",
2930 ((0, 1), (0, 1), (0, 2)),
2931 limit=(3,),
2932 ),
2933 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2934 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2935 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2936 Test(
2937 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2938 ),
2939 Test(
2940 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2941 ),
2942 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2943 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2944 Test(
2945 "tract,-timespan.begin,timespan.end",
2946 "tract,visit",
2947 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2948 ),
2949 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2950 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2951 Test(
2952 "tract,detector",
2953 "tract,detector",
2954 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2955 datasets="flat",
2956 collections="imported_r",
2957 ),
2958 Test(
2959 "tract,detector.full_name",
2960 "tract,detector",
2961 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2962 datasets="flat",
2963 collections="imported_r",
2964 ),
2965 Test(
2966 "tract,detector.raft,detector.name_in_raft",
2967 "tract,detector",
2968 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2969 datasets="flat",
2970 collections="imported_r",
2971 ),
2972 )
2974 for test in test_data:
2975 order_by = test.order_by.split(",")
2976 keys = test.keys.split(",")
2977 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2978 if test.limit is not None:
2979 query = query.limit(*test.limit)
2980 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2981 self.assertEqual(dataIds, test.result)
2983 # and materialize
2984 query = do_query(keys).order_by(*order_by)
2985 if test.limit is not None:
2986 query = query.limit(*test.limit)
2987 with self.assertRaises(RelationalAlgebraError):
2988 with query.materialize():
2989 pass
2991 # errors in a name
2992 for order_by in ("", "-"):
2993 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2994 list(do_query().order_by(order_by))
2996 for order_by in ("undimension.name", "-undimension.name"):
2997 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2998 list(do_query().order_by(order_by))
3000 for order_by in ("attract", "-attract"):
3001 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3002 list(do_query().order_by(order_by))
3004 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3005 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3007 with self.assertRaisesRegex(
3008 ValueError,
3009 r"Timespan exists in more than one dimension element \(exposure, visit\); "
3010 r"qualify timespan with specific dimension name\.",
3011 ):
3012 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3014 with self.assertRaisesRegex(
3015 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3016 ):
3017 list(do_query("tract").order_by("timespan.begin"))
3019 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3020 list(do_query("tract").order_by("tract.timespan.begin"))
3022 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3023 list(do_query("tract").order_by("tract.name"))
3025 with self.assertRaisesRegex(
3026 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3027 ):
3028 list(do_query("visit").order_by("timestamp.begin"))
3030 def testQueryDataIdsGovernorExceptions(self):
3031 """Test exceptions raised by queryDataIds() for incorrect governors."""
3032 registry = self.makeRegistry()
3033 self.loadData(registry, "base.yaml")
3034 self.loadData(registry, "datasets.yaml")
3035 self.loadData(registry, "spatial.yaml")
3037 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3038 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3040 Test = namedtuple(
3041 "testQueryDataIdExceptionsTest",
3042 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3043 defaults=(None, None, None, {}, None, 0),
3044 )
3046 test_data = (
3047 Test("tract,visit", count=6),
3048 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3049 Test(
3050 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3051 ),
3052 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3053 Test(
3054 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3055 ),
3056 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3057 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3058 Test(
3059 "tract,visit",
3060 where="instrument=cam AND skymap=map",
3061 bind={"cam": "Cam1", "map": "SkyMap1"},
3062 count=6,
3063 ),
3064 Test(
3065 "tract,visit",
3066 where="instrument=cam AND skymap=map",
3067 bind={"cam": "Cam", "map": "SkyMap"},
3068 exception=DataIdValueError,
3069 ),
3070 )
3072 for test in test_data:
3073 dimensions = test.dimensions.split(",")
3074 if test.exception:
3075 with self.assertRaises(test.exception):
3076 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3077 else:
3078 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3079 self.assertEqual(query.count(discard=True), test.count)
3081 # and materialize
3082 if test.exception:
3083 with self.assertRaises(test.exception):
3084 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3085 with query.materialize() as materialized:
3086 materialized.count(discard=True)
3087 else:
3088 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3089 with query.materialize() as materialized:
3090 self.assertEqual(materialized.count(discard=True), test.count)
3092 def testQueryDimensionRecordsOrderBy(self):
3093 """Test order_by and limit on result returned by
3094 queryDimensionRecords().
3095 """
3096 registry = self.makeRegistry()
3097 self.loadData(registry, "base.yaml")
3098 self.loadData(registry, "datasets.yaml")
3099 self.loadData(registry, "spatial.yaml")
3101 def do_query(element, datasets=None, collections=None):
3102 return registry.queryDimensionRecords(
3103 element, instrument="Cam1", datasets=datasets, collections=collections
3104 )
3106 query = do_query("detector")
3107 self.assertEqual(len(list(query)), 4)
3109 Test = namedtuple(
3110 "testQueryDataIdsOrderByTest",
3111 ("element", "order_by", "result", "limit", "datasets", "collections"),
3112 defaults=(None, None, None),
3113 )
3115 test_data = (
3116 Test("detector", "detector", (1, 2, 3, 4)),
3117 Test("detector", "-detector", (4, 3, 2, 1)),
3118 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3119 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3120 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3121 Test("visit", "visit", (1, 2)),
3122 Test("visit", "-visit.id", (2, 1)),
3123 Test("visit", "zenith_angle", (1, 2)),
3124 Test("visit", "-visit.name", (2, 1)),
3125 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3126 )
3128 for test in test_data:
3129 order_by = test.order_by.split(",")
3130 query = do_query(test.element).order_by(*order_by)
3131 if test.limit is not None:
3132 query = query.limit(*test.limit)
3133 dataIds = tuple(rec.id for rec in query)
3134 self.assertEqual(dataIds, test.result)
3136 # errors in a name
3137 for order_by in ("", "-"):
3138 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3139 list(do_query("detector").order_by(order_by))
3141 for order_by in ("undimension.name", "-undimension.name"):
3142 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3143 list(do_query("detector").order_by(order_by))
3145 for order_by in ("attract", "-attract"):
3146 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3147 list(do_query("detector").order_by(order_by))
3149 for order_by in ("timestamp.begin", "-timestamp.begin"):
3150 with self.assertRaisesRegex(
3151 ValueError,
3152 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3153 r"perhaps you meant 'timespan.begin'\?",
3154 ):
3155 list(do_query("visit").order_by(order_by))
3157 def testQueryDimensionRecordsExceptions(self):
3158 """Test exceptions raised by queryDimensionRecords()."""
3159 registry = self.makeRegistry()
3160 self.loadData(registry, "base.yaml")
3161 self.loadData(registry, "datasets.yaml")
3162 self.loadData(registry, "spatial.yaml")
3164 result = registry.queryDimensionRecords("detector")
3165 self.assertEqual(result.count(), 4)
3166 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3167 self.assertEqual(result.count(), 4)
3168 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3169 self.assertEqual(result.count(), 4)
3170 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3171 self.assertEqual(result.count(), 4)
3172 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3173 self.assertEqual(result.count(), 4)
3175 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3176 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3177 result.count()
3179 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3180 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3181 result.count()
3183 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3184 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3185 result.count()
3187 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3188 result = registry.queryDimensionRecords(
3189 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3190 )
3191 result.count()
3193 def testDatasetConstrainedDimensionRecordQueries(self):
3194 """Test that queryDimensionRecords works even when given a dataset
3195 constraint whose dimensions extend beyond the requested dimension
3196 element's.
3197 """
3198 registry = self.makeRegistry()
3199 self.loadData(registry, "base.yaml")
3200 self.loadData(registry, "datasets.yaml")
3201 # Query for physical_filter dimension records, using a dataset that
3202 # has both physical_filter and dataset dimensions.
3203 records = registry.queryDimensionRecords(
3204 "physical_filter",
3205 datasets=["flat"],
3206 collections="imported_r",
3207 )
3208 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3209 # Trying to constrain by all dataset types is an error.
3210 with self.assertRaises(TypeError):
3211 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3213 def testSkyPixDatasetQueries(self):
3214 """Test that we can build queries involving skypix dimensions as long
3215 as a dataset type that uses those dimensions is included.
3216 """
3217 registry = self.makeRegistry()
3218 self.loadData(registry, "base.yaml")
3219 dataset_type = DatasetType(
3220 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3221 )
3222 registry.registerDatasetType(dataset_type)
3223 run = "r"
3224 registry.registerRun(run)
3225 # First try queries where there are no datasets; the concern is whether
3226 # we can even build and execute these queries without raising, even
3227 # when "doomed" query shortcuts are in play.
3228 self.assertFalse(
3229 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3230 )
3231 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3232 # Now add a dataset and see that we can get it back.
3233 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3234 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3235 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3236 self.assertEqual(
3237 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3238 {data_id},
3239 )
3240 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3242 def testDatasetIdFactory(self):
3243 """Simple test for DatasetIdFactory, mostly to catch potential changes
3244 in its API.
3245 """
3246 registry = self.makeRegistry()
3247 factory = DatasetIdFactory()
3248 dataset_type = DatasetType(
3249 "datasetType",
3250 dimensions=["detector", "instrument"],
3251 universe=registry.dimensions,
3252 storageClass="int",
3253 )
3254 run = "run"
3255 data_id = DataCoordinate.standardize(
3256 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3257 )
3259 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3260 self.assertIsInstance(datasetId, uuid.UUID)
3261 self.assertEqual(datasetId.version, 4)
3263 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3264 self.assertIsInstance(datasetId, uuid.UUID)
3265 self.assertEqual(datasetId.version, 5)
3267 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3268 self.assertIsInstance(datasetId, uuid.UUID)
3269 self.assertEqual(datasetId.version, 5)
3271 def testExposureQueries(self):
3272 """Test query methods using arguments sourced from the exposure log
3273 service.
3275 The most complete test dataset currently available to daf_butler tests
3276 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3277 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3278 dimension records as it was focused on providing nontrivial spatial
3279 overlaps between visit+detector and tract+patch. So in this test we
3280 need to translate queries that originally used the exposure dimension
3281 to use the (very similar) visit dimension instead.
3282 """
3283 registry = self.makeRegistry()
3284 self.loadData(registry, "hsc-rc2-subset.yaml")
3285 self.assertEqual(
3286 [
3287 record.id
3288 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3289 .order_by("id")
3290 .limit(5)
3291 ],
3292 [318, 322, 326, 330, 332],
3293 )
3294 self.assertEqual(
3295 [
3296 data_id["visit"]
3297 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3298 ],
3299 [318, 322, 326, 330, 332],
3300 )
3301 self.assertEqual(
3302 [
3303 record.id
3304 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3305 .order_by("full_name")
3306 .limit(5)
3307 ],
3308 [73, 72, 71, 70, 65],
3309 )
3310 self.assertEqual(
3311 [
3312 data_id["detector"]
3313 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3314 .order_by("full_name")
3315 .limit(5)
3316 ],
3317 [73, 72, 71, 70, 65],
3318 )
3320 def test_long_query_names(self) -> None:
3321 """Test that queries involving very long names are handled correctly.
3323 This is especially important for PostgreSQL, which truncates symbols
3324 longer than 64 chars, but it's worth testing for all DBs.
3325 """
3326 registry = self.makeRegistry()
3327 name = "abcd" * 17
3328 registry.registerDatasetType(
3329 DatasetType(
3330 name,
3331 dimensions=(),
3332 storageClass="Exposure",
3333 universe=registry.dimensions,
3334 )
3335 )
3336 # Need to search more than one collection actually containing a
3337 # matching dataset to avoid optimizations that sidestep bugs due to
3338 # truncation by making findFirst=True a no-op.
3339 run1 = "run1"
3340 registry.registerRun(run1)
3341 run2 = "run2"
3342 registry.registerRun(run2)
3343 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3344 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3345 self.assertEqual(
3346 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3347 {ref1},
3348 )
3350 def test_skypix_constraint_queries(self) -> None:
3351 """Test queries spatially constrained by a skypix data ID."""
3352 registry = self.makeRegistry()
3353 self.loadData(registry, "hsc-rc2-subset.yaml")
3354 patch_regions = {
3355 (data_id["tract"], data_id["patch"]): data_id.region
3356 for data_id in registry.queryDataIds(["patch"]).expanded()
3357 }
3358 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3359 # This check ensures the test doesn't become trivial due to a config
3360 # change; if it does, just pick a different HTML level.
3361 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3362 # Gather all skypix IDs that definitely overlap at least one of these
3363 # patches.
3364 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3365 for patch_region in patch_regions.values():
3366 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3367 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3368 # and does not overlap at least one other patch.
3369 for skypix_id in itertools.chain.from_iterable(
3370 range(begin, end) for begin, end in relevant_skypix_ids
3371 ):
3372 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3373 overlapping_patches = {
3374 patch_key
3375 for patch_key, patch_region in patch_regions.items()
3376 if not patch_region.isDisjointFrom(skypix_region)
3377 }
3378 if overlapping_patches and overlapping_patches != patch_regions.keys():
3379 break
3380 else:
3381 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3382 self.assertEqual(
3383 {
3384 (data_id["tract"], data_id["patch"])
3385 for data_id in registry.queryDataIds(
3386 ["patch"],
3387 dataId={skypix_dimension.name: skypix_id},
3388 )
3389 },
3390 overlapping_patches,
3391 )
3392 # Test that a three-way join that includes the common skypix system in
3393 # the dimensions doesn't generate redundant join terms in the query.
3394 full_data_ids = set(
3395 registry.queryDataIds(
3396 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3397 ).expanded()
3398 )
3399 self.assertGreater(len(full_data_ids), 0)
3400 for data_id in full_data_ids:
3401 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3402 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3404 def test_spatial_constraint_queries(self) -> None:
3405 """Test queries in which one spatial dimension in the constraint (data
3406 ID or ``where`` string) constrains a different spatial dimension in the
3407 query result columns.
3408 """
3409 registry = self.makeRegistry()
3410 self.loadData(registry, "hsc-rc2-subset.yaml")
3411 patch_regions = {
3412 (data_id["tract"], data_id["patch"]): data_id.region
3413 for data_id in registry.queryDataIds(["patch"]).expanded()
3414 }
3415 observation_regions = {
3416 (data_id["visit"], data_id["detector"]): data_id.region
3417 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3418 }
3419 all_combos = {
3420 (patch_key, observation_key)
3421 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3422 }
3423 overlapping_combos = {
3424 (patch_key, observation_key)
3425 for patch_key, observation_key in all_combos
3426 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3427 }
3428 # Check a direct spatial join with no constraint first.
3429 self.assertEqual(
3430 {
3431 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3432 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3433 },
3434 overlapping_combos,
3435 )
3436 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3437 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3438 for patch_key, observation_key in overlapping_combos:
3439 overlaps_by_patch[patch_key].add(observation_key)
3440 overlaps_by_observation[observation_key].add(patch_key)
3441 # Find patches and observations that overlap at least one of the other
3442 # but not all of the other.
3443 nontrivial_patch = next(
3444 iter(
3445 patch_key
3446 for patch_key, observation_keys in overlaps_by_patch.items()
3447 if observation_keys and observation_keys != observation_regions.keys()
3448 )
3449 )
3450 nontrivial_observation = next(
3451 iter(
3452 observation_key
3453 for observation_key, patch_keys in overlaps_by_observation.items()
3454 if patch_keys and patch_keys != patch_regions.keys()
3455 )
3456 )
3457 # Use the nontrivial patches and observations as constraints on the
3458 # other dimensions in various ways, first via a 'where' expression.
3459 # It's better in general to us 'bind' instead of f-strings, but these
3460 # all integers so there are no quoting concerns.
3461 self.assertEqual(
3462 {
3463 (data_id["visit"], data_id["detector"])
3464 for data_id in registry.queryDataIds(
3465 ["visit", "detector"],
3466 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3467 skymap="hsc_rings_v1",
3468 )
3469 },
3470 overlaps_by_patch[nontrivial_patch],
3471 )
3472 self.assertEqual(
3473 {
3474 (data_id["tract"], data_id["patch"])
3475 for data_id in registry.queryDataIds(
3476 ["patch"],
3477 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3478 instrument="HSC",
3479 )
3480 },
3481 overlaps_by_observation[nontrivial_observation],
3482 )
3483 # and then via the dataId argument.
3484 self.assertEqual(
3485 {
3486 (data_id["visit"], data_id["detector"])
3487 for data_id in registry.queryDataIds(
3488 ["visit", "detector"],
3489 dataId={
3490 "tract": nontrivial_patch[0],
3491 "patch": nontrivial_patch[1],
3492 },
3493 skymap="hsc_rings_v1",
3494 )
3495 },
3496 overlaps_by_patch[nontrivial_patch],
3497 )
3498 self.assertEqual(
3499 {
3500 (data_id["tract"], data_id["patch"])
3501 for data_id in registry.queryDataIds(
3502 ["patch"],
3503 dataId={
3504 "visit": nontrivial_observation[0],
3505 "detector": nontrivial_observation[1],
3506 },
3507 instrument="HSC",
3508 )
3509 },
3510 overlaps_by_observation[nontrivial_observation],
3511 )
3513 def test_query_projection_drop_postprocessing(self) -> None:
3514 """Test that projections and deduplications on query objects can
3515 drop post-query region filtering to ensure the query remains in
3516 the SQL engine.
3517 """
3518 registry = self.makeRegistry()
3519 self.loadData(registry, "base.yaml")
3520 self.loadData(registry, "spatial.yaml")
3522 def pop_transfer(tree: Relation) -> Relation:
3523 """If a relation tree terminates with a transfer to a new engine,
3524 return the relation prior to that transfer. If not, return the
3525 original relation.
3526 """
3527 match tree:
3528 case Transfer(target=target):
3529 return target
3530 case _:
3531 return tree
3533 # There's no public way to get a Query object yet, so we get one from a
3534 # DataCoordinateQueryResults private attribute. When a public API is
3535 # available this test should use it.
3536 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3537 # We expect this query to terminate in the iteration engine originally,
3538 # because region-filtering is necessary.
3539 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3540 # If we deduplicate, we usually have to do that downstream of the
3541 # filtering. That means the deduplication has to happen in the
3542 # iteration engine.
3543 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3544 # If we pass drop_postprocessing, we instead drop the region filtering
3545 # so the deduplication can happen in SQL (though there might still be
3546 # transfer to iteration at the tail of the tree that we can ignore;
3547 # that's what the pop_transfer takes care of here).
3548 self.assertIsInstance(
3549 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3550 sql.Engine,
3551 )
3553 def test_query_find_datasets_drop_postprocessing(self) -> None:
3554 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3555 problems with the FindFirstDataset relation operation.
3556 """
3557 # Setup: load some visit, tract, and patch records, and insert two
3558 # datasets with dimensions {visit, patch}, with one in each of two
3559 # RUN collections.
3560 registry = self.makeRegistry()
3561 self.loadData(registry, "base.yaml")
3562 self.loadData(registry, "spatial.yaml")
3563 storage_class = StorageClass("Warpy")
3564 registry.storageClasses.registerStorageClass(storage_class)
3565 dataset_type = DatasetType(
3566 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3567 )
3568 registry.registerDatasetType(dataset_type)
3569 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3570 registry.registerRun("run1")
3571 registry.registerRun("run2")
3572 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3573 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3574 # Query for the dataset using queryDataIds(...).findDatasets(...)
3575 # against only one of the two collections. This should work even
3576 # though the relation returned by queryDataIds ends with
3577 # iteration-engine region-filtering, because we can recognize before
3578 # running the query that there is only one collecton to search and
3579 # hence the (default) findFirst=True is irrelevant, and joining in the
3580 # dataset query commutes past the iteration-engine postprocessing.
3581 query1 = registry.queryDataIds(
3582 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3583 )
3584 self.assertEqual(
3585 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3586 {ref1},
3587 )
3588 # Query for the dataset using queryDataIds(...).findDatasets(...)
3589 # against both collections. This can only work if the FindFirstDataset
3590 # operation can be commuted past the iteration-engine options into SQL.
3591 query2 = registry.queryDataIds(
3592 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3593 )
3594 self.assertEqual(
3595 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3596 {ref2},
3597 )
3599 def test_query_empty_collections(self) -> None:
3600 """Test for registry query methods with empty collections. The methods
3601 should return empty result set (or None when applicable) and provide
3602 "doomed" diagnostics.
3603 """
3604 registry = self.makeRegistry()
3605 self.loadData(registry, "base.yaml")
3606 self.loadData(registry, "datasets.yaml")
3608 # Tests for registry.findDataset()
3609 with self.assertRaises(NoDefaultCollectionError):
3610 registry.findDataset("bias", instrument="Cam1", detector=1)
3611 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3612 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3614 # Tests for registry.queryDatasets()
3615 with self.assertRaises(NoDefaultCollectionError):
3616 registry.queryDatasets("bias")
3617 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3619 result = registry.queryDatasets("bias", collections=[])
3620 self.assertEqual(len(list(result)), 0)
3621 messages = list(result.explain_no_results())
3622 self.assertTrue(messages)
3623 self.assertTrue(any("because collection list is empty" in message for message in messages))
3625 # Tests for registry.queryDataIds()
3626 with self.assertRaises(NoDefaultCollectionError):
3627 registry.queryDataIds("detector", datasets="bias")
3628 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3630 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3631 self.assertEqual(len(list(result)), 0)
3632 messages = list(result.explain_no_results())
3633 self.assertTrue(messages)
3634 self.assertTrue(any("because collection list is empty" in message for message in messages))
3636 # Tests for registry.queryDimensionRecords()
3637 with self.assertRaises(NoDefaultCollectionError):
3638 registry.queryDimensionRecords("detector", datasets="bias")
3639 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3641 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3642 self.assertEqual(len(list(result)), 0)
3643 messages = list(result.explain_no_results())
3644 self.assertTrue(messages)
3645 self.assertTrue(any("because collection list is empty" in message for message in messages))
3647 def test_dataset_followup_spatial_joins(self) -> None:
3648 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3649 is involved.
3650 """
3651 registry = self.makeRegistry()
3652 self.loadData(registry, "base.yaml")
3653 self.loadData(registry, "spatial.yaml")
3654 pvi_dataset_type = DatasetType(
3655 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3656 )
3657 registry.registerDatasetType(pvi_dataset_type)
3658 collection = "datasets"
3659 registry.registerRun(collection)
3660 (pvi1,) = registry.insertDatasets(
3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3662 )
3663 (pvi2,) = registry.insertDatasets(
3664 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3665 )
3666 (pvi3,) = registry.insertDatasets(
3667 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3668 )
3669 self.assertEqual(
3670 set(
3671 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3672 .expanded()
3673 .findRelatedDatasets("pvi", [collection])
3674 ),
3675 {
3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3678 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3679 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3680 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3681 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3682 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3683 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3684 },
3685 )