Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1520 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-12 09:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import itertools
34import logging
35import os
36import re
37import unittest
38import uuid
39from abc import ABC, abstractmethod
40from collections import defaultdict, namedtuple
41from collections.abc import Iterator
42from datetime import datetime, timedelta
43from typing import TYPE_CHECKING
45import astropy.time
46import sqlalchemy
48try:
49 import numpy as np
50except ImportError:
51 np = None
53import lsst.sphgeom
54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
56from ..._dataset_association import DatasetAssociation
57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
58from ..._dataset_type import DatasetType
59from ..._named import NamedValueSet
60from ..._storage_class import StorageClass
61from ..._timespan import Timespan
62from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionGraph, SkyPixDimension
63from .._collection_summary import CollectionSummary
64from .._collection_type import CollectionType
65from .._config import RegistryConfig
66from .._exceptions import (
67 ArgumentError,
68 CollectionError,
69 CollectionTypeError,
70 ConflictingDefinitionError,
71 DataIdValueError,
72 DatasetTypeError,
73 InconsistentDataIdError,
74 MissingCollectionError,
75 MissingDatasetTypeError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from ..interfaces import ButlerAttributeExistsError
81if TYPE_CHECKING:
82 from .._registry import Registry
85class RegistryTests(ABC):
86 """Generic tests for the `Registry` class that can be subclassed to
87 generate tests for different configurations.
88 """
90 collectionsManager: str | None = None
91 """Name of the collections manager class, if subclass provides value for
92 this member then it overrides name specified in default configuration
93 (`str`).
94 """
96 datasetsManager: str | dict[str, str] | None = None
97 """Name or configuration dictionary of the datasets manager class, if
98 subclass provides value for this member then it overrides name specified
99 in default configuration (`str` or `dict`).
100 """
102 @classmethod
103 @abstractmethod
104 def getDataDir(cls) -> str:
105 """Return the root directory containing test data YAML files."""
106 raise NotImplementedError()
108 def makeRegistryConfig(self) -> RegistryConfig:
109 """Create RegistryConfig used to create a registry.
111 This method should be called by a subclass from `makeRegistry`.
112 Returned instance will be pre-configured based on the values of class
113 members, and default-configured for all other parameters. Subclasses
114 that need default configuration should just instantiate
115 `RegistryConfig` directly.
116 """
117 config = RegistryConfig()
118 if self.collectionsManager:
119 config["managers", "collections"] = self.collectionsManager
120 if self.datasetsManager:
121 config["managers", "datasets"] = self.datasetsManager
122 return config
124 @abstractmethod
125 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
126 """Return the Registry instance to be tested.
128 Parameters
129 ----------
130 share_repo_with : `Registry`, optional
131 If provided, the new registry should point to the same data
132 repository as this existing registry.
134 Returns
135 -------
136 registry : `Registry`
137 New `Registry` instance, or `None` *only* if `share_repo_with` is
138 not `None` and this test case does not support that argument
139 (e.g. it is impossible with in-memory SQLite DBs).
140 """
141 raise NotImplementedError()
143 def loadData(self, registry: Registry, filename: str):
144 """Load registry test data from ``getDataDir/<filename>``,
145 which should be a YAML import/export file.
146 """
147 from ...transfers import YamlRepoImportBackend
149 with open(os.path.join(self.getDataDir(), filename)) as stream:
150 backend = YamlRepoImportBackend(stream, registry)
151 backend.register()
152 backend.load(datastore=None)
154 def checkQueryResults(self, results, expected):
155 """Check that a query results object contains expected values.
157 Parameters
158 ----------
159 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
160 A lazy-evaluation query results object.
161 expected : `list`
162 A list of `DataCoordinate` o `DatasetRef` objects that should be
163 equal to results of the query, aside from ordering.
164 """
165 self.assertCountEqual(list(results), expected)
166 self.assertEqual(results.count(), len(expected))
167 if expected:
168 self.assertTrue(results.any())
169 else:
170 self.assertFalse(results.any())
172 def testOpaque(self):
173 """Tests for `Registry.registerOpaqueTable`,
174 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
175 `Registry.deleteOpaqueData`.
176 """
177 registry = self.makeRegistry()
178 table = "opaque_table_for_testing"
179 registry.registerOpaqueTable(
180 table,
181 spec=ddl.TableSpec(
182 fields=[
183 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
184 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
185 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
186 ],
187 ),
188 )
189 rows = [
190 {"id": 1, "name": "one", "count": None},
191 {"id": 2, "name": "two", "count": 5},
192 {"id": 3, "name": "three", "count": 6},
193 ]
194 registry.insertOpaqueData(table, *rows)
195 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
197 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
198 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
200 # Test very long IN clause which exceeds sqlite limit on number of
201 # parameters. SQLite says the limit is 32k but it looks like it is
202 # much higher.
203 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
204 # Two IN clauses, each longer than 1k batch size, first with
205 # duplicates, second has matching elements in different batches (after
206 # sorting).
207 self.assertEqual(
208 rows[0:2],
209 list(
210 registry.fetchOpaqueData(
211 table,
212 id=list(range(1000)) + list(range(100, 0, -1)),
213 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
214 )
215 ),
216 )
217 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
218 registry.deleteOpaqueData(table, id=3)
219 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
220 registry.deleteOpaqueData(table)
221 self.assertEqual([], list(registry.fetchOpaqueData(table)))
223 def testDatasetType(self):
224 """Tests for `Registry.registerDatasetType` and
225 `Registry.getDatasetType`.
226 """
227 registry = self.makeRegistry()
228 # Check valid insert
229 datasetTypeName = "test"
230 storageClass = StorageClass("testDatasetType")
231 registry.storageClasses.registerStorageClass(storageClass)
232 dimensions = registry.dimensions.extract(("instrument", "visit"))
233 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
234 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
235 # Inserting for the first time should return True
236 self.assertTrue(registry.registerDatasetType(inDatasetType))
237 outDatasetType1 = registry.getDatasetType(datasetTypeName)
238 self.assertEqual(outDatasetType1, inDatasetType)
240 # Re-inserting should work
241 self.assertFalse(registry.registerDatasetType(inDatasetType))
242 # Except when they are not identical
243 with self.assertRaises(ConflictingDefinitionError):
244 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
245 registry.registerDatasetType(nonIdenticalDatasetType)
247 # Template can be None
248 datasetTypeName = "testNoneTemplate"
249 storageClass = StorageClass("testDatasetType2")
250 registry.storageClasses.registerStorageClass(storageClass)
251 dimensions = registry.dimensions.extract(("instrument", "visit"))
252 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
253 registry.registerDatasetType(inDatasetType)
254 outDatasetType2 = registry.getDatasetType(datasetTypeName)
255 self.assertEqual(outDatasetType2, inDatasetType)
257 allTypes = set(registry.queryDatasetTypes())
258 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
260 def testDimensions(self):
261 """Tests for `Registry.insertDimensionData`,
262 `Registry.syncDimensionData`, and `Registry.expandDataId`.
263 """
264 registry = self.makeRegistry()
265 dimensionName = "instrument"
266 dimension = registry.dimensions[dimensionName]
267 dimensionValue = {
268 "name": "DummyCam",
269 "visit_max": 10,
270 "visit_system": 0,
271 "exposure_max": 10,
272 "detector_max": 2,
273 "class_name": "lsst.pipe.base.Instrument",
274 }
275 registry.insertDimensionData(dimensionName, dimensionValue)
276 # Inserting the same value twice should fail
277 with self.assertRaises(sqlalchemy.exc.IntegrityError):
278 registry.insertDimensionData(dimensionName, dimensionValue)
279 # expandDataId should retrieve the record we just inserted
280 self.assertEqual(
281 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
282 .records[dimensionName]
283 .toDict(),
284 dimensionValue,
285 )
286 # expandDataId should raise if there is no record with the given ID.
287 with self.assertRaises(DataIdValueError):
288 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
289 # band doesn't have a table; insert should fail.
290 with self.assertRaises(TypeError):
291 registry.insertDimensionData("band", {"band": "i"})
292 dimensionName2 = "physical_filter"
293 dimension2 = registry.dimensions[dimensionName2]
294 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
295 # Missing required dependency ("instrument") should fail
296 with self.assertRaises(KeyError):
297 registry.insertDimensionData(dimensionName2, dimensionValue2)
298 # Adding required dependency should fix the failure
299 dimensionValue2["instrument"] = "DummyCam"
300 registry.insertDimensionData(dimensionName2, dimensionValue2)
301 # expandDataId should retrieve the record we just inserted.
302 self.assertEqual(
303 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
304 .records[dimensionName2]
305 .toDict(),
306 dimensionValue2,
307 )
308 # Use syncDimensionData to insert a new record successfully.
309 dimensionName3 = "detector"
310 dimensionValue3 = {
311 "instrument": "DummyCam",
312 "id": 1,
313 "full_name": "one",
314 "name_in_raft": "zero",
315 "purpose": "SCIENCE",
316 }
317 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
318 # Sync that again. Note that one field ("raft") is NULL, and that
319 # should be okay.
320 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
321 # Now try that sync with the same primary key but a different value.
322 # This should fail.
323 with self.assertRaises(ConflictingDefinitionError):
324 registry.syncDimensionData(
325 dimensionName3,
326 {
327 "instrument": "DummyCam",
328 "id": 1,
329 "full_name": "one",
330 "name_in_raft": "four",
331 "purpose": "SCIENCE",
332 },
333 )
335 @unittest.skipIf(np is None, "numpy not available.")
336 def testNumpyDataId(self):
337 """Test that we can use a numpy int in a dataId."""
338 registry = self.makeRegistry()
339 dimensionEntries = [
340 ("instrument", {"instrument": "DummyCam"}),
341 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
342 # Using an np.int64 here fails unless Records.fromDict is also
343 # patched to look for numbers.Integral
344 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
345 ]
346 for args in dimensionEntries:
347 registry.insertDimensionData(*args)
349 # Try a normal integer and something that looks like an int but
350 # is not.
351 for visit_id in (42, np.int64(42)):
352 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
353 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
354 self.assertEqual(expanded["visit"], int(visit_id))
355 self.assertIsInstance(expanded["visit"], int)
357 def testDataIdRelationships(self):
358 """Test that `Registry.expandDataId` raises an exception when the given
359 keys are inconsistent.
360 """
361 registry = self.makeRegistry()
362 self.loadData(registry, "base.yaml")
363 # Insert a few more dimension records for the next test.
364 registry.insertDimensionData(
365 "exposure",
366 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
367 )
368 registry.insertDimensionData(
369 "exposure",
370 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
371 )
372 registry.insertDimensionData(
373 "visit_system",
374 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
375 )
376 registry.insertDimensionData(
377 "visit",
378 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
379 )
380 registry.insertDimensionData(
381 "visit_definition",
382 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
383 )
384 with self.assertRaises(InconsistentDataIdError):
385 registry.expandDataId(
386 {"instrument": "Cam1", "visit": 1, "exposure": 2},
387 )
389 def testDataset(self):
390 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
391 and `Registry.removeDatasets`.
392 """
393 registry = self.makeRegistry()
394 self.loadData(registry, "base.yaml")
395 run = "tésτ"
396 registry.registerRun(run)
397 datasetType = registry.getDatasetType("bias")
398 dataId = {"instrument": "Cam1", "detector": 2}
399 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
400 outRef = registry.getDataset(ref.id)
401 self.assertIsNotNone(ref.id)
402 self.assertEqual(ref, outRef)
403 with self.assertRaises(ConflictingDefinitionError):
404 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
405 registry.removeDatasets([ref])
406 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
408 def testFindDataset(self):
409 """Tests for `Registry.findDataset`."""
410 registry = self.makeRegistry()
411 self.loadData(registry, "base.yaml")
412 run = "tésτ"
413 datasetType = registry.getDatasetType("bias")
414 dataId = {"instrument": "Cam1", "detector": 4}
415 registry.registerRun(run)
416 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
417 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
418 self.assertEqual(outputRef, inputRef)
419 # Check that retrieval with invalid dataId raises
420 with self.assertRaises(LookupError):
421 dataId = {"instrument": "Cam1"} # no detector
422 registry.findDataset(datasetType, dataId, collections=run)
423 # Check that different dataIds match to different datasets
424 dataId1 = {"instrument": "Cam1", "detector": 1}
425 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
426 dataId2 = {"instrument": "Cam1", "detector": 2}
427 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
428 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
429 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
430 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
431 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
432 # Check that requesting a non-existing dataId returns None
433 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
434 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
435 # Search more than one collection, in which two have the right
436 # dataset type and another does not.
437 registry.registerRun("empty")
438 self.loadData(registry, "datasets-uuid.yaml")
439 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
440 self.assertIsNotNone(bias1)
441 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
442 self.assertIsNotNone(bias2)
443 self.assertEqual(
444 bias1,
445 registry.findDataset(
446 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
447 ),
448 )
449 self.assertEqual(
450 bias2,
451 registry.findDataset(
452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
453 ),
454 )
455 # Search more than one collection, with one of them a CALIBRATION
456 # collection.
457 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
458 timespan = Timespan(
459 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
460 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
461 )
462 registry.certify("Cam1/calib", [bias2], timespan=timespan)
463 self.assertEqual(
464 bias1,
465 registry.findDataset(
466 "bias",
467 instrument="Cam1",
468 detector=2,
469 collections=["empty", "imported_g", "Cam1/calib"],
470 timespan=timespan,
471 ),
472 )
473 self.assertEqual(
474 bias2,
475 registry.findDataset(
476 "bias",
477 instrument="Cam1",
478 detector=2,
479 collections=["empty", "Cam1/calib", "imported_g"],
480 timespan=timespan,
481 ),
482 )
483 # If we try to search those same collections without a timespan, it
484 # should still work, since the CALIBRATION collection is ignored.
485 self.assertEqual(
486 bias1,
487 registry.findDataset(
488 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
489 ),
490 )
491 self.assertEqual(
492 bias1,
493 registry.findDataset(
494 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
495 ),
496 )
498 def testRemoveDatasetTypeSuccess(self):
499 """Test that Registry.removeDatasetType works when there are no
500 datasets of that type present.
501 """
502 registry = self.makeRegistry()
503 self.loadData(registry, "base.yaml")
504 registry.removeDatasetType("flat")
505 with self.assertRaises(MissingDatasetTypeError):
506 registry.getDatasetType("flat")
508 def testRemoveDatasetTypeFailure(self):
509 """Test that Registry.removeDatasetType raises when there are datasets
510 of that type present or if the dataset type is for a component.
511 """
512 registry = self.makeRegistry()
513 self.loadData(registry, "base.yaml")
514 self.loadData(registry, "datasets.yaml")
515 with self.assertRaises(OrphanedRecordError):
516 registry.removeDatasetType("flat")
517 with self.assertRaises(ValueError):
518 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
520 def testImportDatasetsUUID(self):
521 """Test for `Registry._importDatasets` with UUID dataset ID."""
522 if isinstance(self.datasetsManager, str):
523 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
524 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
525 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
526 ".ByDimensionsDatasetRecordStorageManagerUUID"
527 ):
528 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
530 registry = self.makeRegistry()
531 self.loadData(registry, "base.yaml")
532 for run in range(6):
533 registry.registerRun(f"run{run}")
534 datasetTypeBias = registry.getDatasetType("bias")
535 datasetTypeFlat = registry.getDatasetType("flat")
536 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
537 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
538 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
540 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
541 (ref1,) = registry._importDatasets([ref])
542 # UUID is used without change
543 self.assertEqual(ref.id, ref1.id)
545 # All different failure modes
546 refs = (
547 # Importing same DatasetRef with different dataset ID is an error
548 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
549 # Same DatasetId but different DataId
550 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
551 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
552 # Same DatasetRef and DatasetId but different run
553 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
554 )
555 for ref in refs:
556 with self.assertRaises(ConflictingDefinitionError):
557 registry._importDatasets([ref])
559 # Test for non-unique IDs, they can be re-imported multiple times.
560 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
561 with self.subTest(idGenMode=idGenMode):
562 # Make dataset ref with reproducible dataset ID.
563 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
564 (ref1,) = registry._importDatasets([ref])
565 self.assertIsInstance(ref1.id, uuid.UUID)
566 self.assertEqual(ref1.id.version, 5)
567 self.assertEqual(ref1.id, ref.id)
569 # Importing it again is OK
570 (ref2,) = registry._importDatasets([ref1])
571 self.assertEqual(ref2.id, ref1.id)
573 # Cannot import to different run with the same ID
574 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
575 with self.assertRaises(ConflictingDefinitionError):
576 registry._importDatasets([ref])
578 ref = DatasetRef(
579 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
580 )
581 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
582 # Cannot import same DATAID_TYPE ref into a new run
583 with self.assertRaises(ConflictingDefinitionError):
584 (ref2,) = registry._importDatasets([ref])
585 else:
586 # DATAID_TYPE_RUN ref can be imported into a new run
587 (ref2,) = registry._importDatasets([ref])
589 def testDatasetTypeComponentQueries(self):
590 """Test component options when querying for dataset types.
592 All of the behavior here is deprecated, so many of these tests are
593 currently wrapped in a context to check that we get a warning whenever
594 a component dataset is actually returned.
595 """
596 registry = self.makeRegistry()
597 self.loadData(registry, "base.yaml")
598 self.loadData(registry, "datasets.yaml")
599 # Test querying for dataset types with different inputs.
600 # First query for all dataset types; components should only be included
601 # when components=True.
602 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
603 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
604 with self.assertWarns(FutureWarning):
605 self.assertLess(
606 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
607 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
608 )
609 # Use a pattern that can match either parent or components. Again,
610 # components are only returned if components=True.
611 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
612 self.assertEqual(
613 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
614 )
615 with self.assertWarns(FutureWarning):
616 self.assertLess(
617 {"bias", "bias.wcs"},
618 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
619 )
620 # This pattern matches only a component. In this case we also return
621 # that component dataset type if components=None.
622 with self.assertWarns(FutureWarning):
623 self.assertEqual(
624 {"bias.wcs"},
625 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names,
626 )
627 self.assertEqual(
628 set(),
629 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
630 )
631 with self.assertWarns(FutureWarning):
632 self.assertEqual(
633 {"bias.wcs"},
634 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
635 )
636 # Add a dataset type using a StorageClass that we'll then remove; check
637 # that this does not affect our ability to query for dataset types
638 # (though it will warn).
639 tempStorageClass = StorageClass(
640 name="TempStorageClass",
641 components={
642 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
643 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
644 },
645 )
646 registry.storageClasses.registerStorageClass(tempStorageClass)
647 datasetType = DatasetType(
648 "temporary",
649 dimensions=["instrument"],
650 storageClass=tempStorageClass,
651 universe=registry.dimensions,
652 )
653 registry.registerDatasetType(datasetType)
654 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
655 datasetType._storageClass = None
656 del tempStorageClass
657 # Querying for all dataset types, including components, should include
658 # at least all non-component dataset types (and I don't want to
659 # enumerate all of the Exposure components for bias and flat here).
660 with self.assertWarns(FutureWarning):
661 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
662 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
663 self.assertIn("TempStorageClass", cm.output[0])
664 self.assertLess({"bias", "flat", "temporary"}, everything.names)
665 # It should not include "temporary.columns", because we tried to remove
666 # the storage class that would tell it about that. So if the next line
667 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
668 # this part of the test isn't doing anything, because the _unregister
669 # call about isn't simulating the real-life case we want it to
670 # simulate, in which different versions of daf_butler in entirely
671 # different Python processes interact with the same repo.
672 self.assertNotIn("temporary.data", everything.names)
673 # Query for dataset types that start with "temp". This should again
674 # not include the component, and also not fail.
675 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
676 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
677 self.assertIn("TempStorageClass", cm.output[0])
678 self.assertEqual({"temporary"}, startsWithTemp.names)
679 # Querying with no components should not warn at all.
680 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
681 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
682 # Must issue a warning of our own to be captured.
683 logging.getLogger("lsst.daf.butler.registries").warning("test message")
684 self.assertEqual(len(cm.output), 1)
685 self.assertIn("test message", cm.output[0])
687 def testComponentLookups(self):
688 """Test searching for component datasets via their parents.
690 All of the behavior here is deprecated, so many of these tests are
691 currently wrapped in a context to check that we get a warning whenever
692 a component dataset is actually returned.
693 """
694 registry = self.makeRegistry()
695 self.loadData(registry, "base.yaml")
696 self.loadData(registry, "datasets.yaml")
697 # Test getting the child dataset type (which does still exist in the
698 # Registry), and check for consistency with
699 # DatasetRef.makeComponentRef.
700 collection = "imported_g"
701 parentType = registry.getDatasetType("bias")
702 childType = registry.getDatasetType("bias.wcs")
703 parentRefResolved = registry.findDataset(
704 parentType, collections=collection, instrument="Cam1", detector=1
705 )
706 self.assertIsInstance(parentRefResolved, DatasetRef)
707 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
708 # Search for a single dataset with findDataset.
709 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
710 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
711 # Search for detector data IDs constrained by component dataset
712 # existence with queryDataIds.
713 with self.assertWarns(FutureWarning):
714 dataIds = registry.queryDataIds(
715 ["detector"],
716 datasets=["bias.wcs"],
717 collections=collection,
718 ).toSet()
719 self.assertEqual(
720 dataIds,
721 DataCoordinateSet(
722 {
723 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
724 for d in (1, 2, 3)
725 },
726 parentType.dimensions,
727 ),
728 )
729 # Search for multiple datasets of a single type with queryDatasets.
730 with self.assertWarns(FutureWarning):
731 childRefs2 = set(
732 registry.queryDatasets(
733 "bias.wcs",
734 collections=collection,
735 )
736 )
737 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType})
738 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds))
740 def testCollections(self):
741 """Tests for registry methods that manage collections."""
742 registry = self.makeRegistry()
743 other_registry = self.makeRegistry(share_repo_with=registry)
744 self.loadData(registry, "base.yaml")
745 self.loadData(registry, "datasets.yaml")
746 run1 = "imported_g"
747 run2 = "imported_r"
748 # Test setting a collection docstring after it has been created.
749 registry.setCollectionDocumentation(run1, "doc for run1")
750 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
751 registry.setCollectionDocumentation(run1, None)
752 self.assertIsNone(registry.getCollectionDocumentation(run1))
753 datasetType = "bias"
754 # Find some datasets via their run's collection.
755 dataId1 = {"instrument": "Cam1", "detector": 1}
756 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
757 self.assertIsNotNone(ref1)
758 dataId2 = {"instrument": "Cam1", "detector": 2}
759 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
760 self.assertIsNotNone(ref2)
761 # Associate those into a new collection, then look for them there.
762 tag1 = "tag1"
763 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
764 # Check that we can query for old and new collections by type.
765 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
766 self.assertEqual(
767 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
768 {tag1, run1, run2},
769 )
770 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
771 registry.associate(tag1, [ref1, ref2])
772 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
773 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
774 # Disassociate one and verify that we can't it there anymore...
775 registry.disassociate(tag1, [ref1])
776 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
777 # ...but we can still find ref2 in tag1, and ref1 in the run.
778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
780 collections = set(registry.queryCollections())
781 self.assertEqual(collections, {run1, run2, tag1})
782 # Associate both refs into tag1 again; ref2 is already there, but that
783 # should be a harmless no-op.
784 registry.associate(tag1, [ref1, ref2])
785 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
786 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
787 # Get a different dataset (from a different run) that has the same
788 # dataset type and data ID as ref2.
789 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
790 self.assertNotEqual(ref2, ref2b)
791 # Attempting to associate that into tag1 should be an error.
792 with self.assertRaises(ConflictingDefinitionError):
793 registry.associate(tag1, [ref2b])
794 # That error shouldn't have messed up what we had before.
795 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
796 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
797 # Attempt to associate the conflicting dataset again, this time with
798 # a dataset that isn't in the collection and won't cause a conflict.
799 # Should also fail without modifying anything.
800 dataId3 = {"instrument": "Cam1", "detector": 3}
801 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
802 with self.assertRaises(ConflictingDefinitionError):
803 registry.associate(tag1, [ref3, ref2b])
804 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
805 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
806 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
807 # Register a chained collection that searches [tag1, run2]
808 chain1 = "chain1"
809 registry.registerCollection(chain1, type=CollectionType.CHAINED)
810 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
811 # Chained collection exists, but has no collections in it.
812 self.assertFalse(registry.getCollectionChain(chain1))
813 # If we query for all collections, we should get the chained collection
814 # only if we don't ask to flatten it (i.e. yield only its children).
815 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
816 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
817 # Attempt to set its child collections to something circular; that
818 # should fail.
819 with self.assertRaises(ValueError):
820 registry.setCollectionChain(chain1, [tag1, chain1])
821 # Add the child collections.
822 registry.setCollectionChain(chain1, [tag1, run2])
823 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
824 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
825 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
826 # Refresh the other registry that points to the same repo, and make
827 # sure it can see the things we've done (note that this does require
828 # an explicit refresh(); that's the documented behavior, because
829 # caching is ~impossible otherwise).
830 if other_registry is not None:
831 other_registry.refresh()
832 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
833 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
834 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
835 # Searching for dataId1 or dataId2 in the chain should return ref1 and
836 # ref2, because both are in tag1.
837 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
838 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
839 # Now disassociate ref2 from tag1. The search (for bias) with
840 # dataId2 in chain1 should then:
841 # 1. not find it in tag1
842 # 2. find a different dataset in run2
843 registry.disassociate(tag1, [ref2])
844 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
845 self.assertNotEqual(ref2b, ref2)
846 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
847 # Define a new chain so we can test recursive chains.
848 chain2 = "chain2"
849 registry.registerCollection(chain2, type=CollectionType.CHAINED)
850 registry.setCollectionChain(chain2, [run2, chain1])
851 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
852 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
853 # Query for collections matching a regex.
854 self.assertCountEqual(
855 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
856 ["imported_r", "imported_g"],
857 )
858 # Query for collections matching a regex or an explicit str.
859 self.assertCountEqual(
860 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
861 ["imported_r", "imported_g", "chain1"],
862 )
863 # Search for bias with dataId1 should find it via tag1 in chain2,
864 # recursing, because is not in run1.
865 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
866 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
867 # Search for bias with dataId2 should find it in run2 (ref2b).
868 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
869 # Search for a flat that is in run2. That should not be found
870 # at the front of chain2, because of the restriction to bias
871 # on run2 there, but it should be found in at the end of chain1.
872 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
873 ref4 = registry.findDataset("flat", dataId4, collections=run2)
874 self.assertIsNotNone(ref4)
875 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
876 # Deleting a collection that's part of a CHAINED collection is not
877 # allowed, and is exception-safe.
878 with self.assertRaises(sqlalchemy.exc.IntegrityError):
879 registry.removeCollection(run2)
880 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
881 with self.assertRaises(sqlalchemy.exc.IntegrityError):
882 registry.removeCollection(chain1)
883 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
884 # Actually remove chain2, test that it's gone by asking for its type.
885 registry.removeCollection(chain2)
886 with self.assertRaises(MissingCollectionError):
887 registry.getCollectionType(chain2)
888 # Actually remove run2 and chain1, which should work now.
889 registry.removeCollection(chain1)
890 registry.removeCollection(run2)
891 with self.assertRaises(MissingCollectionError):
892 registry.getCollectionType(run2)
893 with self.assertRaises(MissingCollectionError):
894 registry.getCollectionType(chain1)
895 # Remove tag1 as well, just to test that we can remove TAGGED
896 # collections.
897 registry.removeCollection(tag1)
898 with self.assertRaises(MissingCollectionError):
899 registry.getCollectionType(tag1)
901 def testCollectionChainFlatten(self):
902 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
903 registry = self.makeRegistry()
904 registry.registerCollection("inner", CollectionType.CHAINED)
905 registry.registerCollection("innermost", CollectionType.RUN)
906 registry.setCollectionChain("inner", ["innermost"])
907 registry.registerCollection("outer", CollectionType.CHAINED)
908 registry.setCollectionChain("outer", ["inner"], flatten=False)
909 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
910 registry.setCollectionChain("outer", ["inner"], flatten=True)
911 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
913 def testBasicTransaction(self):
914 """Test that all operations within a single transaction block are
915 rolled back if an exception propagates out of the block.
916 """
917 registry = self.makeRegistry()
918 storageClass = StorageClass("testDatasetType")
919 registry.storageClasses.registerStorageClass(storageClass)
920 with registry.transaction():
921 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
922 with self.assertRaises(ValueError):
923 with registry.transaction():
924 registry.insertDimensionData("instrument", {"name": "Cam2"})
925 raise ValueError("Oops, something went wrong")
926 # Cam1 should exist
927 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
928 # But Cam2 and Cam3 should both not exist
929 with self.assertRaises(DataIdValueError):
930 registry.expandDataId(instrument="Cam2")
931 with self.assertRaises(DataIdValueError):
932 registry.expandDataId(instrument="Cam3")
934 def testNestedTransaction(self):
935 """Test that operations within a transaction block are not rolled back
936 if an exception propagates out of an inner transaction block and is
937 then caught.
938 """
939 registry = self.makeRegistry()
940 dimension = registry.dimensions["instrument"]
941 dataId1 = {"instrument": "DummyCam"}
942 dataId2 = {"instrument": "DummyCam2"}
943 checkpointReached = False
944 with registry.transaction():
945 # This should be added and (ultimately) committed.
946 registry.insertDimensionData(dimension, dataId1)
947 with self.assertRaises(sqlalchemy.exc.IntegrityError):
948 with registry.transaction(savepoint=True):
949 # This does not conflict, and should succeed (but not
950 # be committed).
951 registry.insertDimensionData(dimension, dataId2)
952 checkpointReached = True
953 # This should conflict and raise, triggerring a rollback
954 # of the previous insertion within the same transaction
955 # context, but not the original insertion in the outer
956 # block.
957 registry.insertDimensionData(dimension, dataId1)
958 self.assertTrue(checkpointReached)
959 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
960 with self.assertRaises(DataIdValueError):
961 registry.expandDataId(dataId2, graph=dimension.graph)
963 def testInstrumentDimensions(self):
964 """Test queries involving only instrument dimensions, with no joins to
965 skymap.
966 """
967 registry = self.makeRegistry()
969 # need a bunch of dimensions and datasets for test
970 registry.insertDimensionData(
971 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
972 )
973 registry.insertDimensionData(
974 "physical_filter",
975 dict(instrument="DummyCam", name="dummy_r", band="r"),
976 dict(instrument="DummyCam", name="dummy_i", band="i"),
977 )
978 registry.insertDimensionData(
979 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
980 )
981 registry.insertDimensionData(
982 "visit_system",
983 dict(instrument="DummyCam", id=1, name="default"),
984 )
985 registry.insertDimensionData(
986 "visit",
987 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
988 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
989 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
990 )
991 for i in range(1, 6):
992 registry.insertDimensionData(
993 "visit_detector_region",
994 dict(instrument="DummyCam", visit=10, detector=i),
995 dict(instrument="DummyCam", visit=11, detector=i),
996 dict(instrument="DummyCam", visit=20, detector=i),
997 )
998 registry.insertDimensionData(
999 "exposure",
1000 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
1001 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
1002 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
1003 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
1004 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
1005 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
1006 )
1007 registry.insertDimensionData(
1008 "visit_definition",
1009 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1010 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1011 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1012 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1013 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1014 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1015 )
1016 # dataset types
1017 run1 = "test1_r"
1018 run2 = "test2_r"
1019 tagged2 = "test2_t"
1020 registry.registerRun(run1)
1021 registry.registerRun(run2)
1022 registry.registerCollection(tagged2)
1023 storageClass = StorageClass("testDataset")
1024 registry.storageClasses.registerStorageClass(storageClass)
1025 rawType = DatasetType(
1026 name="RAW",
1027 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1028 storageClass=storageClass,
1029 )
1030 registry.registerDatasetType(rawType)
1031 calexpType = DatasetType(
1032 name="CALEXP",
1033 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1034 storageClass=storageClass,
1035 )
1036 registry.registerDatasetType(calexpType)
1038 # add pre-existing datasets
1039 for exposure in (100, 101, 110, 111):
1040 for detector in (1, 2, 3):
1041 # note that only 3 of 5 detectors have datasets
1042 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1043 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1044 # exposures 100 and 101 appear in both run1 and tagged2.
1045 # 100 has different datasets in the different collections
1046 # 101 has the same dataset in both collections.
1047 if exposure == 100:
1048 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1049 if exposure in (100, 101):
1050 registry.associate(tagged2, [ref])
1051 # Add pre-existing datasets to tagged2.
1052 for exposure in (200, 201):
1053 for detector in (3, 4, 5):
1054 # note that only 3 of 5 detectors have datasets
1055 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1056 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1057 registry.associate(tagged2, [ref])
1059 dimensions = DimensionGraph(
1060 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1061 )
1062 # Test that single dim string works as well as list of str
1063 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1064 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1065 self.assertEqual(rows, rowsI)
1066 # with empty expression
1067 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1068 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1069 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1070 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1071 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1073 # second collection
1074 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1075 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1076 for dataId in rows:
1077 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1078 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1079 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1080 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1082 # with two input datasets
1083 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1084 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1085 for dataId in rows:
1086 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1087 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1088 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1089 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1091 # limit to single visit
1092 rows = registry.queryDataIds(
1093 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1094 ).toSet()
1095 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1096 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1097 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1098 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1100 # more limiting expression, using link names instead of Table.column
1101 rows = registry.queryDataIds(
1102 dimensions,
1103 datasets=rawType,
1104 collections=run1,
1105 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1106 ).toSet()
1107 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1108 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1109 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1110 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1112 # queryDataIds with only one of `datasets` and `collections` is an
1113 # error.
1114 with self.assertRaises(CollectionError):
1115 registry.queryDataIds(dimensions, datasets=rawType)
1116 with self.assertRaises(ArgumentError):
1117 registry.queryDataIds(dimensions, collections=run1)
1119 # expression excludes everything
1120 rows = registry.queryDataIds(
1121 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1122 ).toSet()
1123 self.assertEqual(len(rows), 0)
1125 # Selecting by physical_filter, this is not in the dimensions, but it
1126 # is a part of the full expression so it should work too.
1127 rows = registry.queryDataIds(
1128 dimensions,
1129 datasets=rawType,
1130 collections=run1,
1131 where="physical_filter = 'dummy_r'",
1132 instrument="DummyCam",
1133 ).toSet()
1134 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1135 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1136 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1137 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1139 def testSkyMapDimensions(self):
1140 """Tests involving only skymap dimensions, no joins to instrument."""
1141 registry = self.makeRegistry()
1143 # need a bunch of dimensions and datasets for test, we want
1144 # "band" in the test so also have to add physical_filter
1145 # dimensions
1146 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1147 registry.insertDimensionData(
1148 "physical_filter",
1149 dict(instrument="DummyCam", name="dummy_r", band="r"),
1150 dict(instrument="DummyCam", name="dummy_i", band="i"),
1151 )
1152 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1153 for tract in range(10):
1154 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1155 registry.insertDimensionData(
1156 "patch",
1157 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1158 )
1160 # dataset types
1161 run = "tésτ"
1162 registry.registerRun(run)
1163 storageClass = StorageClass("testDataset")
1164 registry.storageClasses.registerStorageClass(storageClass)
1165 calexpType = DatasetType(
1166 name="deepCoadd_calexp",
1167 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1168 storageClass=storageClass,
1169 )
1170 registry.registerDatasetType(calexpType)
1171 mergeType = DatasetType(
1172 name="deepCoadd_mergeDet",
1173 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1174 storageClass=storageClass,
1175 )
1176 registry.registerDatasetType(mergeType)
1177 measType = DatasetType(
1178 name="deepCoadd_meas",
1179 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1180 storageClass=storageClass,
1181 )
1182 registry.registerDatasetType(measType)
1184 dimensions = DimensionGraph(
1185 registry.dimensions,
1186 dimensions=(
1187 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1188 ),
1189 )
1191 # add pre-existing datasets
1192 for tract in (1, 3, 5):
1193 for patch in (2, 4, 6, 7):
1194 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1195 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1196 for aFilter in ("i", "r"):
1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1198 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1200 # with empty expression
1201 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1202 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1203 for dataId in rows:
1204 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1205 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1206 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1207 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1209 # limit to 2 tracts and 2 patches
1210 rows = registry.queryDataIds(
1211 dimensions,
1212 datasets=[calexpType, mergeType],
1213 collections=run,
1214 where="tract IN (1, 5) AND patch IN (2, 7)",
1215 skymap="DummyMap",
1216 ).toSet()
1217 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1218 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1219 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1220 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1222 # limit to single filter
1223 rows = registry.queryDataIds(
1224 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1225 ).toSet()
1226 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1227 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1228 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1229 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1231 # Specifying non-existing skymap is an exception
1232 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1233 rows = registry.queryDataIds(
1234 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1235 ).toSet()
1237 def testSpatialJoin(self):
1238 """Test queries that involve spatial overlap joins."""
1239 registry = self.makeRegistry()
1240 self.loadData(registry, "hsc-rc2-subset.yaml")
1242 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1243 # the TopologicalFamily they belong to. We'll relate all elements in
1244 # each family to all of the elements in each other family.
1245 families = defaultdict(set)
1246 # Dictionary of {element.name: {dataId: region}}.
1247 regions = {}
1248 for element in registry.dimensions.getDatabaseElements():
1249 if element.spatial is not None:
1250 families[element.spatial.name].add(element)
1251 regions[element.name] = {
1252 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1253 }
1255 # If this check fails, it's not necessarily a problem - it may just be
1256 # a reasonable change to the default dimension definitions - but the
1257 # test below depends on there being more than one family to do anything
1258 # useful.
1259 self.assertEqual(len(families), 2)
1261 # Overlap DatabaseDimensionElements with each other.
1262 for family1, family2 in itertools.combinations(families, 2):
1263 for element1, element2 in itertools.product(families[family1], families[family2]):
1264 graph = DimensionGraph.union(element1.graph, element2.graph)
1265 # Construct expected set of overlapping data IDs via a
1266 # brute-force comparison of the regions we've already fetched.
1267 expected = {
1268 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1269 for (dataId1, region1), (dataId2, region2) in itertools.product(
1270 regions[element1.name].items(), regions[element2.name].items()
1271 )
1272 if not region1.isDisjointFrom(region2)
1273 }
1274 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1275 queried = set(registry.queryDataIds(graph))
1276 self.assertEqual(expected, queried)
1278 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1279 commonSkyPix = registry.dimensions.commonSkyPix
1280 for elementName, these_regions in regions.items():
1281 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1282 expected = set()
1283 for dataId, region in these_regions.items():
1284 for begin, end in commonSkyPix.pixelization.envelope(region):
1285 expected.update(
1286 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1287 for index in range(begin, end)
1288 )
1289 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1290 queried = set(registry.queryDataIds(graph))
1291 self.assertEqual(expected, queried)
1293 def testAbstractQuery(self):
1294 """Test that we can run a query that just lists the known
1295 bands. This is tricky because band is
1296 backed by a query against physical_filter.
1297 """
1298 registry = self.makeRegistry()
1299 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1300 registry.insertDimensionData(
1301 "physical_filter",
1302 dict(instrument="DummyCam", name="dummy_i", band="i"),
1303 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1304 dict(instrument="DummyCam", name="dummy_r", band="r"),
1305 )
1306 rows = registry.queryDataIds(["band"]).toSet()
1307 self.assertCountEqual(
1308 rows,
1309 [
1310 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1311 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1312 ],
1313 )
1315 def testAttributeManager(self):
1316 """Test basic functionality of attribute manager."""
1317 # number of attributes with schema versions in a fresh database,
1318 # 6 managers with 2 records per manager, plus config for dimensions
1319 VERSION_COUNT = 6 * 2 + 1
1321 registry = self.makeRegistry()
1322 attributes = registry._managers.attributes
1324 # check what get() returns for non-existing key
1325 self.assertIsNone(attributes.get("attr"))
1326 self.assertEqual(attributes.get("attr", ""), "")
1327 self.assertEqual(attributes.get("attr", "Value"), "Value")
1328 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1330 # cannot store empty key or value
1331 with self.assertRaises(ValueError):
1332 attributes.set("", "value")
1333 with self.assertRaises(ValueError):
1334 attributes.set("attr", "")
1336 # set value of non-existing key
1337 attributes.set("attr", "value")
1338 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1339 self.assertEqual(attributes.get("attr"), "value")
1341 # update value of existing key
1342 with self.assertRaises(ButlerAttributeExistsError):
1343 attributes.set("attr", "value2")
1345 attributes.set("attr", "value2", force=True)
1346 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1347 self.assertEqual(attributes.get("attr"), "value2")
1349 # delete existing key
1350 self.assertTrue(attributes.delete("attr"))
1351 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1353 # delete non-existing key
1354 self.assertFalse(attributes.delete("non-attr"))
1356 # store bunch of keys and get the list back
1357 data = [
1358 ("version.core", "1.2.3"),
1359 ("version.dimensions", "3.2.1"),
1360 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1361 ]
1362 for key, value in data:
1363 attributes.set(key, value)
1364 items = dict(attributes.items())
1365 for key, value in data:
1366 self.assertEqual(items[key], value)
1368 def testQueryDatasetsDeduplication(self):
1369 """Test that the findFirst option to queryDatasets selects datasets
1370 from collections in the order given".
1371 """
1372 registry = self.makeRegistry()
1373 self.loadData(registry, "base.yaml")
1374 self.loadData(registry, "datasets.yaml")
1375 self.assertCountEqual(
1376 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1377 [
1378 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1379 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1380 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1381 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1382 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1383 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1384 ],
1385 )
1386 self.assertCountEqual(
1387 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1388 [
1389 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1390 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1391 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1392 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1393 ],
1394 )
1395 self.assertCountEqual(
1396 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1397 [
1398 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1399 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1400 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1401 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1402 ],
1403 )
1405 def testQueryResults(self):
1406 """Test querying for data IDs and then manipulating the QueryResults
1407 object returned to perform other queries.
1408 """
1409 registry = self.makeRegistry()
1410 self.loadData(registry, "base.yaml")
1411 self.loadData(registry, "datasets.yaml")
1412 bias = registry.getDatasetType("bias")
1413 flat = registry.getDatasetType("flat")
1414 # Obtain expected results from methods other than those we're testing
1415 # here. That includes:
1416 # - the dimensions of the data IDs we want to query:
1417 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1418 # - the dimensions of some other data IDs we'll extract from that:
1419 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1420 # - the data IDs we expect to obtain from the first queries:
1421 expectedDataIds = DataCoordinateSet(
1422 {
1423 DataCoordinate.standardize(
1424 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1425 )
1426 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1427 },
1428 graph=expectedGraph,
1429 hasFull=False,
1430 hasRecords=False,
1431 )
1432 # - the flat datasets we expect to find from those data IDs, in just
1433 # one collection (so deduplication is irrelevant):
1434 expectedFlats = [
1435 registry.findDataset(
1436 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1437 ),
1438 registry.findDataset(
1439 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1440 ),
1441 registry.findDataset(
1442 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1443 ),
1444 ]
1445 # - the data IDs we expect to extract from that:
1446 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1447 # - the bias datasets we expect to find from those data IDs, after we
1448 # subset-out the physical_filter dimension, both with duplicates:
1449 expectedAllBiases = [
1450 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1451 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1452 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1453 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1454 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1455 ]
1456 # - ...and without duplicates:
1457 expectedDeduplicatedBiases = [
1458 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1459 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1460 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1461 ]
1462 # Test against those expected results, using a "lazy" query for the
1463 # data IDs (which re-executes that query each time we use it to do
1464 # something new).
1465 dataIds = registry.queryDataIds(
1466 ["detector", "physical_filter"],
1467 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1468 instrument="Cam1",
1469 )
1470 self.assertEqual(dataIds.graph, expectedGraph)
1471 self.assertEqual(dataIds.toSet(), expectedDataIds)
1472 self.assertCountEqual(
1473 list(
1474 dataIds.findDatasets(
1475 flat,
1476 collections=["imported_r"],
1477 )
1478 ),
1479 expectedFlats,
1480 )
1481 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1482 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1483 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1484 self.assertCountEqual(
1485 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1486 expectedAllBiases,
1487 )
1488 self.assertCountEqual(
1489 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1490 expectedDeduplicatedBiases,
1491 )
1493 # Searching for a dataset with dimensions we had projected away
1494 # restores those dimensions.
1495 self.assertCountEqual(
1496 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1497 expectedFlats,
1498 )
1500 # Use a component dataset type.
1501 self.assertCountEqual(
1502 [
1503 ref.makeComponentRef("image")
1504 for ref in subsetDataIds.findDatasets(
1505 bias,
1506 collections=["imported_r", "imported_g"],
1507 findFirst=False,
1508 )
1509 ],
1510 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1511 )
1513 # Use a named dataset type that does not exist and a dataset type
1514 # object that does not exist.
1515 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1517 # Test both string name and dataset type object.
1518 test_type: str | DatasetType
1519 for test_type, test_type_name in (
1520 (unknown_type, unknown_type.name),
1521 (unknown_type.name, unknown_type.name),
1522 ):
1523 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1524 list(
1525 subsetDataIds.findDatasets(
1526 test_type, collections=["imported_r", "imported_g"], findFirst=True
1527 )
1528 )
1530 # Materialize the bias dataset queries (only) by putting the results
1531 # into temporary tables, then repeat those tests.
1532 with subsetDataIds.findDatasets(
1533 bias, collections=["imported_r", "imported_g"], findFirst=False
1534 ).materialize() as biases:
1535 self.assertCountEqual(list(biases), expectedAllBiases)
1536 with subsetDataIds.findDatasets(
1537 bias, collections=["imported_r", "imported_g"], findFirst=True
1538 ).materialize() as biases:
1539 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1540 # Materialize the data ID subset query, but not the dataset queries.
1541 with subsetDataIds.materialize() as subsetDataIds:
1542 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1543 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1544 self.assertCountEqual(
1545 list(
1546 subsetDataIds.findDatasets(
1547 bias, collections=["imported_r", "imported_g"], findFirst=False
1548 )
1549 ),
1550 expectedAllBiases,
1551 )
1552 self.assertCountEqual(
1553 list(
1554 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1555 ),
1556 expectedDeduplicatedBiases,
1557 )
1558 # Materialize the dataset queries, too.
1559 with subsetDataIds.findDatasets(
1560 bias, collections=["imported_r", "imported_g"], findFirst=False
1561 ).materialize() as biases:
1562 self.assertCountEqual(list(biases), expectedAllBiases)
1563 with subsetDataIds.findDatasets(
1564 bias, collections=["imported_r", "imported_g"], findFirst=True
1565 ).materialize() as biases:
1566 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1567 # Materialize the original query, but none of the follow-up queries.
1568 with dataIds.materialize() as dataIds:
1569 self.assertEqual(dataIds.graph, expectedGraph)
1570 self.assertEqual(dataIds.toSet(), expectedDataIds)
1571 self.assertCountEqual(
1572 list(
1573 dataIds.findDatasets(
1574 flat,
1575 collections=["imported_r"],
1576 )
1577 ),
1578 expectedFlats,
1579 )
1580 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1581 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1582 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1583 self.assertCountEqual(
1584 list(
1585 subsetDataIds.findDatasets(
1586 bias, collections=["imported_r", "imported_g"], findFirst=False
1587 )
1588 ),
1589 expectedAllBiases,
1590 )
1591 self.assertCountEqual(
1592 list(
1593 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1594 ),
1595 expectedDeduplicatedBiases,
1596 )
1597 # Materialize just the bias dataset queries.
1598 with subsetDataIds.findDatasets(
1599 bias, collections=["imported_r", "imported_g"], findFirst=False
1600 ).materialize() as biases:
1601 self.assertCountEqual(list(biases), expectedAllBiases)
1602 with subsetDataIds.findDatasets(
1603 bias, collections=["imported_r", "imported_g"], findFirst=True
1604 ).materialize() as biases:
1605 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1606 # Materialize the subset data ID query, but not the dataset
1607 # queries.
1608 with subsetDataIds.materialize() as subsetDataIds:
1609 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1610 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1611 self.assertCountEqual(
1612 list(
1613 subsetDataIds.findDatasets(
1614 bias, collections=["imported_r", "imported_g"], findFirst=False
1615 )
1616 ),
1617 expectedAllBiases,
1618 )
1619 self.assertCountEqual(
1620 list(
1621 subsetDataIds.findDatasets(
1622 bias, collections=["imported_r", "imported_g"], findFirst=True
1623 )
1624 ),
1625 expectedDeduplicatedBiases,
1626 )
1627 # Materialize the bias dataset queries, too, so now we're
1628 # materializing every single step.
1629 with subsetDataIds.findDatasets(
1630 bias, collections=["imported_r", "imported_g"], findFirst=False
1631 ).materialize() as biases:
1632 self.assertCountEqual(list(biases), expectedAllBiases)
1633 with subsetDataIds.findDatasets(
1634 bias, collections=["imported_r", "imported_g"], findFirst=True
1635 ).materialize() as biases:
1636 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1638 def testStorageClassPropagation(self):
1639 """Test that queries for datasets respect the storage class passed in
1640 as part of a full dataset type.
1641 """
1642 registry = self.makeRegistry()
1643 self.loadData(registry, "base.yaml")
1644 dataset_type_in_registry = DatasetType(
1645 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1646 )
1647 registry.registerDatasetType(dataset_type_in_registry)
1648 run = "run1"
1649 registry.registerRun(run)
1650 (inserted_ref,) = registry.insertDatasets(
1651 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1652 )
1653 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1654 query_dataset_type = DatasetType(
1655 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1656 )
1657 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1658 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1659 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1660 (query_datasets_ref,) = query_datasets_result
1661 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1662 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1663 query_dataset_type, collections=[run]
1664 )
1665 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1666 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1667 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1668 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1669 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1670 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1671 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1673 def testEmptyDimensionsQueries(self):
1674 """Test Query and QueryResults objects in the case where there are no
1675 dimensions.
1676 """
1677 # Set up test data: one dataset type, two runs, one dataset in each.
1678 registry = self.makeRegistry()
1679 self.loadData(registry, "base.yaml")
1680 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1681 registry.registerDatasetType(schema)
1682 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1683 run1 = "run1"
1684 run2 = "run2"
1685 registry.registerRun(run1)
1686 registry.registerRun(run2)
1687 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1688 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1689 # Query directly for both of the datasets, and each one, one at a time.
1690 self.checkQueryResults(
1691 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1692 )
1693 self.checkQueryResults(
1694 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1695 [dataset1],
1696 )
1697 self.checkQueryResults(
1698 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1699 [dataset2],
1700 )
1701 # Query for data IDs with no dimensions.
1702 dataIds = registry.queryDataIds([])
1703 self.checkQueryResults(dataIds, [dataId])
1704 # Use queried data IDs to find the datasets.
1705 self.checkQueryResults(
1706 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1707 [dataset1, dataset2],
1708 )
1709 self.checkQueryResults(
1710 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1711 [dataset1],
1712 )
1713 self.checkQueryResults(
1714 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1715 [dataset2],
1716 )
1717 # Now materialize the data ID query results and repeat those tests.
1718 with dataIds.materialize() as dataIds:
1719 self.checkQueryResults(dataIds, [dataId])
1720 self.checkQueryResults(
1721 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1722 [dataset1],
1723 )
1724 self.checkQueryResults(
1725 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1726 [dataset2],
1727 )
1728 # Query for non-empty data IDs, then subset that to get the empty one.
1729 # Repeat the above tests starting from that.
1730 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1731 self.checkQueryResults(dataIds, [dataId])
1732 self.checkQueryResults(
1733 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1734 [dataset1, dataset2],
1735 )
1736 self.checkQueryResults(
1737 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1738 [dataset1],
1739 )
1740 self.checkQueryResults(
1741 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1742 [dataset2],
1743 )
1744 with dataIds.materialize() as dataIds:
1745 self.checkQueryResults(dataIds, [dataId])
1746 self.checkQueryResults(
1747 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1748 [dataset1, dataset2],
1749 )
1750 self.checkQueryResults(
1751 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1752 [dataset1],
1753 )
1754 self.checkQueryResults(
1755 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1756 [dataset2],
1757 )
1758 # Query for non-empty data IDs, then materialize, then subset to get
1759 # the empty one. Repeat again.
1760 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1761 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1762 self.checkQueryResults(dataIds, [dataId])
1763 self.checkQueryResults(
1764 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1765 [dataset1, dataset2],
1766 )
1767 self.checkQueryResults(
1768 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1769 [dataset1],
1770 )
1771 self.checkQueryResults(
1772 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1773 [dataset2],
1774 )
1775 with dataIds.materialize() as dataIds:
1776 self.checkQueryResults(dataIds, [dataId])
1777 self.checkQueryResults(
1778 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1779 [dataset1, dataset2],
1780 )
1781 self.checkQueryResults(
1782 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1783 [dataset1],
1784 )
1785 self.checkQueryResults(
1786 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1787 [dataset2],
1788 )
1789 # Query for non-empty data IDs with a constraint on an empty-data-ID
1790 # dataset that exists.
1791 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1792 self.checkQueryResults(
1793 dataIds.subset(unique=True),
1794 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1795 )
1796 # Again query for non-empty data IDs with a constraint on empty-data-ID
1797 # datasets, but when the datasets don't exist. We delete the existing
1798 # dataset and query just that collection rather than creating a new
1799 # empty collection because this is a bit less likely for our build-time
1800 # logic to shortcut-out (via the collection summaries), and such a
1801 # shortcut would make this test a bit more trivial than we'd like.
1802 registry.removeDatasets([dataset2])
1803 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1804 self.checkQueryResults(dataIds, [])
1806 def testDimensionDataModifications(self):
1807 """Test that modifying dimension records via:
1808 syncDimensionData(..., update=True) and
1809 insertDimensionData(..., replace=True) works as expected, even in the
1810 presence of datasets using those dimensions and spatial overlap
1811 relationships.
1812 """
1814 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1815 """Unpack a sphgeom.RangeSet into the integers it contains."""
1816 for begin, end in ranges:
1817 yield from range(begin, end)
1819 def range_set_hull(
1820 ranges: lsst.sphgeom.RangeSet,
1821 pixelization: lsst.sphgeom.HtmPixelization,
1822 ) -> lsst.sphgeom.ConvexPolygon:
1823 """Create a ConvexPolygon hull of the region defined by a set of
1824 HTM pixelization index ranges.
1825 """
1826 points = []
1827 for index in unpack_range_set(ranges):
1828 points.extend(pixelization.triangle(index).getVertices())
1829 return lsst.sphgeom.ConvexPolygon(points)
1831 # Use HTM to set up an initial parent region (one arbitrary trixel)
1832 # and four child regions (the trixels within the parent at the next
1833 # level. We'll use the parent as a tract/visit region and the children
1834 # as its patch/visit_detector regions.
1835 registry = self.makeRegistry()
1836 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1837 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1838 index = 12288
1839 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1840 assert htm6.universe().contains(child_ranges_small)
1841 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1842 parent_region_small = lsst.sphgeom.ConvexPolygon(
1843 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1844 )
1845 assert all(parent_region_small.contains(c) for c in child_regions_small)
1846 # Make a larger version of each child region, defined to be the set of
1847 # htm6 trixels that overlap the original's bounding circle. Make a new
1848 # parent that's the convex hull of the new children.
1849 child_regions_large = [
1850 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1851 ]
1852 assert all(
1853 large.contains(small)
1854 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1855 )
1856 parent_region_large = lsst.sphgeom.ConvexPolygon(
1857 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1858 )
1859 assert all(parent_region_large.contains(c) for c in child_regions_large)
1860 assert parent_region_large.contains(parent_region_small)
1861 assert not parent_region_small.contains(parent_region_large)
1862 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1863 # Find some commonSkyPix indices that overlap the large regions but not
1864 # overlap the small regions. We use commonSkyPix here to make sure the
1865 # real tests later involve what's in the database, not just post-query
1866 # filtering of regions.
1867 child_difference_indices = []
1868 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1869 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1870 assert difference, "if this is empty, we can't test anything useful with these regions"
1871 assert all(
1872 not commonSkyPix.triangle(d).isDisjointFrom(large)
1873 and commonSkyPix.triangle(d).isDisjointFrom(small)
1874 for d in difference
1875 )
1876 child_difference_indices.append(difference)
1877 parent_difference_indices = list(
1878 unpack_range_set(
1879 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1880 )
1881 )
1882 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1883 assert all(
1884 (
1885 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1886 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1887 )
1888 for d in parent_difference_indices
1889 )
1890 # Now that we've finally got those regions, we'll insert the large ones
1891 # as tract/patch dimension records.
1892 skymap_name = "testing_v1"
1893 registry.insertDimensionData(
1894 "skymap",
1895 {
1896 "name": skymap_name,
1897 "hash": bytes([42]),
1898 "tract_max": 1,
1899 "patch_nx_max": 2,
1900 "patch_ny_max": 2,
1901 },
1902 )
1903 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1904 registry.insertDimensionData(
1905 "patch",
1906 *[
1907 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1908 for n, c in enumerate(child_regions_large)
1909 ],
1910 )
1911 # Add at dataset that uses these dimensions to make sure that modifying
1912 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1913 # implement insert with replace=True as delete-then-insert).
1914 dataset_type = DatasetType(
1915 "coadd",
1916 dimensions=["tract", "patch"],
1917 universe=registry.dimensions,
1918 storageClass="Exposure",
1919 )
1920 registry.registerDatasetType(dataset_type)
1921 registry.registerCollection("the_run", CollectionType.RUN)
1922 registry.insertDatasets(
1923 dataset_type,
1924 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1925 run="the_run",
1926 )
1927 # Query for tracts and patches that overlap some "difference" htm9
1928 # pixels; there should be overlaps, because the database has
1929 # the "large" suite of regions.
1930 self.assertEqual(
1931 {0},
1932 {
1933 data_id["tract"]
1934 for data_id in registry.queryDataIds(
1935 ["tract"],
1936 skymap=skymap_name,
1937 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1938 )
1939 },
1940 )
1941 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1942 self.assertIn(
1943 patch_id,
1944 {
1945 data_id["patch"]
1946 for data_id in registry.queryDataIds(
1947 ["patch"],
1948 skymap=skymap_name,
1949 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1950 )
1951 },
1952 )
1953 # Use sync to update the tract region and insert to update the regions
1954 # of the patches, to the "small" suite.
1955 updated = registry.syncDimensionData(
1956 "tract",
1957 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1958 update=True,
1959 )
1960 self.assertEqual(updated, {"region": parent_region_large})
1961 registry.insertDimensionData(
1962 "patch",
1963 *[
1964 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1965 for n, c in enumerate(child_regions_small)
1966 ],
1967 replace=True,
1968 )
1969 # Query again; there now should be no such overlaps, because the
1970 # database has the "small" suite of regions.
1971 self.assertFalse(
1972 set(
1973 registry.queryDataIds(
1974 ["tract"],
1975 skymap=skymap_name,
1976 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1977 )
1978 )
1979 )
1980 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1981 self.assertNotIn(
1982 patch_id,
1983 {
1984 data_id["patch"]
1985 for data_id in registry.queryDataIds(
1986 ["patch"],
1987 skymap=skymap_name,
1988 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1989 )
1990 },
1991 )
1992 # Update back to the large regions and query one more time.
1993 updated = registry.syncDimensionData(
1994 "tract",
1995 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1996 update=True,
1997 )
1998 self.assertEqual(updated, {"region": parent_region_small})
1999 registry.insertDimensionData(
2000 "patch",
2001 *[
2002 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2003 for n, c in enumerate(child_regions_large)
2004 ],
2005 replace=True,
2006 )
2007 self.assertEqual(
2008 {0},
2009 {
2010 data_id["tract"]
2011 for data_id in registry.queryDataIds(
2012 ["tract"],
2013 skymap=skymap_name,
2014 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2015 )
2016 },
2017 )
2018 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2019 self.assertIn(
2020 patch_id,
2021 {
2022 data_id["patch"]
2023 for data_id in registry.queryDataIds(
2024 ["patch"],
2025 skymap=skymap_name,
2026 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2027 )
2028 },
2029 )
2031 def testCalibrationCollections(self):
2032 """Test operations on `~CollectionType.CALIBRATION` collections,
2033 including `Registry.certify`, `Registry.decertify`,
2034 `Registry.findDataset`, and
2035 `DataCoordinateQueryResults.findRelatedDatasets`.
2036 """
2037 # Setup - make a Registry, fill it with some datasets in
2038 # non-calibration collections.
2039 registry = self.makeRegistry()
2040 self.loadData(registry, "base.yaml")
2041 self.loadData(registry, "datasets.yaml")
2042 # Set up some timestamps.
2043 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2044 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2045 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2046 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2047 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2048 allTimespans = [
2049 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2050 ]
2051 # Insert some exposure records with timespans between each sequential
2052 # pair of those.
2053 registry.insertDimensionData(
2054 "exposure",
2055 {
2056 "instrument": "Cam1",
2057 "id": 0,
2058 "obs_id": "zero",
2059 "physical_filter": "Cam1-G",
2060 "timespan": Timespan(t1, t2),
2061 },
2062 {
2063 "instrument": "Cam1",
2064 "id": 1,
2065 "obs_id": "one",
2066 "physical_filter": "Cam1-G",
2067 "timespan": Timespan(t2, t3),
2068 },
2069 {
2070 "instrument": "Cam1",
2071 "id": 2,
2072 "obs_id": "two",
2073 "physical_filter": "Cam1-G",
2074 "timespan": Timespan(t3, t4),
2075 },
2076 {
2077 "instrument": "Cam1",
2078 "id": 3,
2079 "obs_id": "three",
2080 "physical_filter": "Cam1-G",
2081 "timespan": Timespan(t4, t5),
2082 },
2083 )
2084 # Get references to some datasets.
2085 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2086 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2087 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2088 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2089 # Register the main calibration collection we'll be working with.
2090 collection = "Cam1/calibs/default"
2091 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2092 # Cannot associate into a calibration collection (no timespan).
2093 with self.assertRaises(CollectionTypeError):
2094 registry.associate(collection, [bias2a])
2095 # Certify 2a dataset with [t2, t4) validity.
2096 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2097 # Test that we can query for this dataset via the new collection, both
2098 # on its own and with a RUN collection.
2099 self.assertEqual(
2100 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2101 {bias2a},
2102 )
2103 self.assertEqual(
2104 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2105 {
2106 bias2a,
2107 bias2b,
2108 bias3b,
2109 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2110 },
2111 )
2112 self.assertEqual(
2113 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2114 {registry.expandDataId(instrument="Cam1", detector=2)},
2115 )
2116 self.assertEqual(
2117 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2118 {
2119 registry.expandDataId(instrument="Cam1", detector=2),
2120 registry.expandDataId(instrument="Cam1", detector=3),
2121 registry.expandDataId(instrument="Cam1", detector=4),
2122 },
2123 )
2124 self.assertEqual(
2125 set(
2126 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2127 "bias", findFirst=True, collections=[collection]
2128 )
2129 ),
2130 {
2131 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2132 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2133 },
2134 )
2135 self.assertEqual(
2136 set(
2137 registry.queryDataIds(
2138 ["exposure", "detector"], instrument="Cam1", detector=2
2139 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2140 ),
2141 {
2142 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2143 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2144 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2145 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2146 },
2147 )
2149 # We should not be able to certify 2b with anything overlapping that
2150 # window.
2151 with self.assertRaises(ConflictingDefinitionError):
2152 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2153 with self.assertRaises(ConflictingDefinitionError):
2154 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2155 with self.assertRaises(ConflictingDefinitionError):
2156 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2157 with self.assertRaises(ConflictingDefinitionError):
2158 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2159 with self.assertRaises(ConflictingDefinitionError):
2160 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2161 with self.assertRaises(ConflictingDefinitionError):
2162 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2163 with self.assertRaises(ConflictingDefinitionError):
2164 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2165 with self.assertRaises(ConflictingDefinitionError):
2166 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2167 # We should be able to certify 3a with a range overlapping that window,
2168 # because it's for a different detector.
2169 # We'll certify 3a over [t1, t3).
2170 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2171 # Now we'll certify 2b and 3b together over [t4, ∞).
2172 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2174 # Fetch all associations and check that they are what we expect.
2175 self.assertCountEqual(
2176 list(
2177 registry.queryDatasetAssociations(
2178 "bias",
2179 collections=[collection, "imported_g", "imported_r"],
2180 )
2181 ),
2182 [
2183 DatasetAssociation(
2184 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2185 collection="imported_g",
2186 timespan=None,
2187 ),
2188 DatasetAssociation(
2189 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2190 collection="imported_r",
2191 timespan=None,
2192 ),
2193 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2194 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2195 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2196 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2197 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2198 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2199 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2200 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2201 ],
2202 )
2204 class Ambiguous:
2205 """Tag class to denote lookups that should be ambiguous."""
2207 pass
2209 def assertLookup(
2210 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2211 ) -> None:
2212 """Local function that asserts that a bias lookup returns the given
2213 expected result.
2214 """
2215 if expected is Ambiguous:
2216 with self.assertRaises((DatasetTypeError, LookupError)):
2217 registry.findDataset(
2218 "bias",
2219 collections=collection,
2220 instrument="Cam1",
2221 detector=detector,
2222 timespan=timespan,
2223 )
2224 else:
2225 self.assertEqual(
2226 expected,
2227 registry.findDataset(
2228 "bias",
2229 collections=collection,
2230 instrument="Cam1",
2231 detector=detector,
2232 timespan=timespan,
2233 ),
2234 )
2236 # Systematically test lookups against expected results.
2237 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2238 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2239 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2240 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2241 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2242 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2243 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2244 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2245 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2246 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2247 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2248 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2249 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2250 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2251 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2252 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2253 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2254 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2255 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2256 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2257 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2258 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2259 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2260 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2261 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2262 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2263 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2264 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2265 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2266 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2267 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2268 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2269 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2270 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2271 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2272 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2273 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2274 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2275 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2276 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2277 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2278 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2280 # Test lookups via temporal joins to exposures.
2281 self.assertEqual(
2282 set(
2283 registry.queryDataIds(
2284 ["exposure", "detector"], instrument="Cam1", detector=2
2285 ).findRelatedDatasets("bias", collections=[collection])
2286 ),
2287 {
2288 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2289 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2290 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2291 },
2292 )
2293 self.assertEqual(
2294 set(
2295 registry.queryDataIds(
2296 ["exposure", "detector"], instrument="Cam1", detector=3
2297 ).findRelatedDatasets("bias", collections=[collection])
2298 ),
2299 {
2300 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2301 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2302 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2303 },
2304 )
2305 self.assertEqual(
2306 set(
2307 registry.queryDataIds(
2308 ["exposure", "detector"], instrument="Cam1", detector=2
2309 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2310 ),
2311 {
2312 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2313 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2314 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2315 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2316 },
2317 )
2318 self.assertEqual(
2319 set(
2320 registry.queryDataIds(
2321 ["exposure", "detector"], instrument="Cam1", detector=3
2322 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2323 ),
2324 {
2325 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2326 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2327 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2328 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2329 },
2330 )
2332 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2333 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2334 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2335 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2336 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2337 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2338 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2339 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2340 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2341 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2342 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2343 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2344 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2345 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2346 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2347 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2348 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2349 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2350 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2351 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2352 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2353 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2354 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2355 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2356 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2357 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2358 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2359 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2360 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2361 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2362 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2363 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2364 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2365 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2366 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2367 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2368 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2369 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2370 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2371 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2372 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2373 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2374 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2375 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2376 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2377 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2379 # Decertify everything, this time with explicit data IDs, then check
2380 # that no lookups succeed.
2381 registry.decertify(
2382 collection,
2383 "bias",
2384 Timespan(None, None),
2385 dataIds=[
2386 dict(instrument="Cam1", detector=2),
2387 dict(instrument="Cam1", detector=3),
2388 ],
2389 )
2390 for detector in (2, 3):
2391 for timespan in allTimespans:
2392 assertLookup(detector=detector, timespan=timespan, expected=None)
2393 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2394 # those.
2395 registry.certify(
2396 collection,
2397 [bias2a, bias3a],
2398 Timespan(None, None),
2399 )
2400 for timespan in allTimespans:
2401 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2402 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2403 # Decertify just bias2 over [t2, t4).
2404 # This should split a single certification row into two (and leave the
2405 # other existing row, for bias3a, alone).
2406 registry.decertify(
2407 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2408 )
2409 for timespan in allTimespans:
2410 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2411 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2412 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2413 if overlapsBefore and overlapsAfter:
2414 expected = Ambiguous
2415 elif overlapsBefore or overlapsAfter:
2416 expected = bias2a
2417 else:
2418 expected = None
2419 assertLookup(detector=2, timespan=timespan, expected=expected)
2421 def testSkipCalibs(self):
2422 """Test how queries handle skipping of calibration collections."""
2423 registry = self.makeRegistry()
2424 self.loadData(registry, "base.yaml")
2425 self.loadData(registry, "datasets.yaml")
2427 coll_calib = "Cam1/calibs/default"
2428 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2430 # Add all biases to the calibration collection.
2431 # Without this, the logic that prunes dataset subqueries based on
2432 # datasetType-collection summary information will fire before the logic
2433 # we want to test below. This is a good thing (it avoids the dreaded
2434 # NotImplementedError a bit more often) everywhere but here.
2435 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2437 coll_list = [coll_calib, "imported_g", "imported_r"]
2438 chain = "Cam1/chain"
2439 registry.registerCollection(chain, type=CollectionType.CHAINED)
2440 registry.setCollectionChain(chain, coll_list)
2442 # explicit list will raise if findFirst=True or there are temporal
2443 # dimensions
2444 with self.assertRaises(NotImplementedError):
2445 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2446 with self.assertRaises(NotImplementedError):
2447 registry.queryDataIds(
2448 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2449 ).count()
2451 # chain will skip
2452 datasets = list(registry.queryDatasets("bias", collections=chain))
2453 self.assertGreater(len(datasets), 0)
2455 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2456 self.assertGreater(len(dataIds), 0)
2458 # glob will skip too
2459 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2460 self.assertGreater(len(datasets), 0)
2462 # regular expression will skip too
2463 pattern = re.compile(".*")
2464 datasets = list(registry.queryDatasets("bias", collections=pattern))
2465 self.assertGreater(len(datasets), 0)
2467 # ellipsis should work as usual
2468 datasets = list(registry.queryDatasets("bias", collections=...))
2469 self.assertGreater(len(datasets), 0)
2471 # few tests with findFirst
2472 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2473 self.assertGreater(len(datasets), 0)
2475 def testIngestTimeQuery(self):
2476 registry = self.makeRegistry()
2477 self.loadData(registry, "base.yaml")
2478 dt0 = datetime.utcnow()
2479 self.loadData(registry, "datasets.yaml")
2480 dt1 = datetime.utcnow()
2482 datasets = list(registry.queryDatasets(..., collections=...))
2483 len0 = len(datasets)
2484 self.assertGreater(len0, 0)
2486 where = "ingest_date > T'2000-01-01'"
2487 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2488 len1 = len(datasets)
2489 self.assertEqual(len0, len1)
2491 # no one will ever use this piece of software in 30 years
2492 where = "ingest_date > T'2050-01-01'"
2493 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2494 len2 = len(datasets)
2495 self.assertEqual(len2, 0)
2497 # Check more exact timing to make sure there is no 37 seconds offset
2498 # (after fixing DM-30124). SQLite time precision is 1 second, make
2499 # sure that we don't test with higher precision.
2500 tests = [
2501 # format: (timestamp, operator, expected_len)
2502 (dt0 - timedelta(seconds=1), ">", len0),
2503 (dt0 - timedelta(seconds=1), "<", 0),
2504 (dt1 + timedelta(seconds=1), "<", len0),
2505 (dt1 + timedelta(seconds=1), ">", 0),
2506 ]
2507 for dt, op, expect_len in tests:
2508 dt_str = dt.isoformat(sep=" ")
2510 where = f"ingest_date {op} T'{dt_str}'"
2511 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2512 self.assertEqual(len(datasets), expect_len)
2514 # same with bind using datetime or astropy Time
2515 where = f"ingest_date {op} ingest_time"
2516 datasets = list(
2517 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2518 )
2519 self.assertEqual(len(datasets), expect_len)
2521 dt_astropy = astropy.time.Time(dt, format="datetime")
2522 datasets = list(
2523 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2524 )
2525 self.assertEqual(len(datasets), expect_len)
2527 def testTimespanQueries(self):
2528 """Test query expressions involving timespans."""
2529 registry = self.makeRegistry()
2530 self.loadData(registry, "hsc-rc2-subset.yaml")
2531 # All exposures in the database; mapping from ID to timespan.
2532 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2533 # Just those IDs, sorted (which is also temporal sorting, because HSC
2534 # exposure IDs are monotonically increasing).
2535 ids = sorted(visits.keys())
2536 self.assertGreater(len(ids), 20)
2537 # Pick some quasi-random indexes into `ids` to play with.
2538 i1 = int(len(ids) * 0.1)
2539 i2 = int(len(ids) * 0.3)
2540 i3 = int(len(ids) * 0.6)
2541 i4 = int(len(ids) * 0.8)
2542 # Extract some times from those: just before the beginning of i1 (which
2543 # should be after the end of the exposure before), exactly the
2544 # beginning of i2, just after the beginning of i3 (and before its end),
2545 # and the exact end of i4.
2546 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2547 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2548 t2 = visits[ids[i2]].begin
2549 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2550 self.assertLess(t3, visits[ids[i3]].end)
2551 t4 = visits[ids[i4]].end
2552 # Make sure those are actually in order.
2553 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2555 bind = {
2556 "t1": t1,
2557 "t2": t2,
2558 "t3": t3,
2559 "t4": t4,
2560 "ts23": Timespan(t2, t3),
2561 }
2563 def query(where):
2564 """Return results as a sorted, deduplicated list of visit IDs."""
2565 return sorted(
2566 {
2567 dataId["visit"]
2568 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2569 }
2570 )
2572 # Try a bunch of timespan queries, mixing up the bounds themselves,
2573 # where they appear in the expression, and how we get the timespan into
2574 # the expression.
2576 # t1 is before the start of i1, so this should not include i1.
2577 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2578 # t2 is exactly at the start of i2, but ends are exclusive, so these
2579 # should not include i2.
2580 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2581 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2582 # t3 is in the middle of i3, so this should include i3.
2583 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2584 # This one should not include t3 by the same reasoning.
2585 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2586 # t4 is exactly at the end of i4, so this should include i4.
2587 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2588 # i4's upper bound of t4 is exclusive so this should not include t4.
2589 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2591 # Now some timespan vs. time scalar queries.
2592 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2593 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2594 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2595 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2596 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2597 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2599 # Empty timespans should not overlap anything.
2600 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2602 def testCollectionSummaries(self):
2603 """Test recording and retrieval of collection summaries."""
2604 self.maxDiff = None
2605 registry = self.makeRegistry()
2606 # Importing datasets from yaml should go through the code path where
2607 # we update collection summaries as we insert datasets.
2608 self.loadData(registry, "base.yaml")
2609 self.loadData(registry, "datasets.yaml")
2610 flat = registry.getDatasetType("flat")
2611 expected1 = CollectionSummary()
2612 expected1.dataset_types.add(registry.getDatasetType("bias"))
2613 expected1.add_data_ids(
2614 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2615 )
2616 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2617 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2618 # Create a chained collection with both of the imported runs; the
2619 # summary should be the same, because it's a union with itself.
2620 chain = "chain"
2621 registry.registerCollection(chain, CollectionType.CHAINED)
2622 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2623 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2624 # Associate flats only into a tagged collection and a calibration
2625 # collection to check summaries of those.
2626 tag = "tag"
2627 registry.registerCollection(tag, CollectionType.TAGGED)
2628 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2629 calibs = "calibs"
2630 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2631 registry.certify(
2632 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2633 )
2634 expected2 = expected1.copy()
2635 expected2.dataset_types.discard("bias")
2636 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2637 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2638 # Explicitly calling Registry.refresh() should load those same
2639 # summaries, via a totally different code path.
2640 registry.refresh()
2641 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2642 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2643 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2644 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2646 def testBindInQueryDatasets(self):
2647 """Test that the bind parameter is correctly forwarded in
2648 queryDatasets recursion.
2649 """
2650 registry = self.makeRegistry()
2651 # Importing datasets from yaml should go through the code path where
2652 # we update collection summaries as we insert datasets.
2653 self.loadData(registry, "base.yaml")
2654 self.loadData(registry, "datasets.yaml")
2655 self.assertEqual(
2656 set(registry.queryDatasets("flat", band="r", collections=...)),
2657 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2658 )
2660 def testQueryIntRangeExpressions(self):
2661 """Test integer range expressions in ``where`` arguments.
2663 Note that our expressions use inclusive stop values, unlike Python's.
2664 """
2665 registry = self.makeRegistry()
2666 self.loadData(registry, "base.yaml")
2667 self.assertEqual(
2668 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2669 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2670 )
2671 self.assertEqual(
2672 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2673 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2674 )
2675 self.assertEqual(
2676 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2677 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2678 )
2680 def testQueryResultSummaries(self):
2681 """Test summary methods like `count`, `any`, and `explain_no_results`
2682 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2683 """
2684 registry = self.makeRegistry()
2685 self.loadData(registry, "base.yaml")
2686 self.loadData(registry, "datasets.yaml")
2687 self.loadData(registry, "spatial.yaml")
2688 # Default test dataset has two collections, each with both flats and
2689 # biases. Add a new collection with only biases.
2690 registry.registerCollection("biases", CollectionType.TAGGED)
2691 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2692 # First query yields two results, and involves no postprocessing.
2693 query1 = registry.queryDataIds(["physical_filter"], band="r")
2694 self.assertTrue(query1.any(execute=False, exact=False))
2695 self.assertTrue(query1.any(execute=True, exact=False))
2696 self.assertTrue(query1.any(execute=True, exact=True))
2697 self.assertEqual(query1.count(exact=False), 2)
2698 self.assertEqual(query1.count(exact=True), 2)
2699 self.assertFalse(list(query1.explain_no_results()))
2700 # Second query should yield no results, which we should see when
2701 # we attempt to expand the data ID.
2702 query2 = registry.queryDataIds(["physical_filter"], band="h")
2703 # There's no execute=False, exact=Fals test here because the behavior
2704 # not something we want to guarantee in this case (and exact=False
2705 # says either answer is legal).
2706 self.assertFalse(query2.any(execute=True, exact=False))
2707 self.assertFalse(query2.any(execute=True, exact=True))
2708 self.assertEqual(query2.count(exact=False), 0)
2709 self.assertEqual(query2.count(exact=True), 0)
2710 self.assertTrue(list(query2.explain_no_results()))
2711 # These queries yield no results due to various problems that can be
2712 # spotted prior to execution, yielding helpful diagnostics.
2713 base_query = registry.queryDataIds(["detector", "physical_filter"])
2714 queries_and_snippets = [
2715 (
2716 # Dataset type name doesn't match any existing dataset types.
2717 registry.queryDatasets("nonexistent", collections=...),
2718 ["nonexistent"],
2719 ),
2720 (
2721 # Dataset type object isn't registered.
2722 registry.queryDatasets(
2723 DatasetType(
2724 "nonexistent",
2725 dimensions=["instrument"],
2726 universe=registry.dimensions,
2727 storageClass="Image",
2728 ),
2729 collections=...,
2730 ),
2731 ["nonexistent"],
2732 ),
2733 (
2734 # No datasets of this type in this collection.
2735 registry.queryDatasets("flat", collections=["biases"]),
2736 ["flat", "biases"],
2737 ),
2738 (
2739 # No datasets of this type in this collection.
2740 base_query.findDatasets("flat", collections=["biases"]),
2741 ["flat", "biases"],
2742 ),
2743 (
2744 # No collections matching at all.
2745 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2746 ["potato"],
2747 ),
2748 ]
2749 # The behavior of these additional queries is slated to change in the
2750 # future, so we also check for deprecation warnings.
2751 with self.assertWarns(FutureWarning):
2752 queries_and_snippets.append(
2753 (
2754 # Dataset type name doesn't match any existing dataset
2755 # types.
2756 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2757 ["nonexistent"],
2758 )
2759 )
2760 with self.assertWarns(FutureWarning):
2761 queries_and_snippets.append(
2762 (
2763 # Dataset type name doesn't match any existing dataset
2764 # types.
2765 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2766 ["nonexistent"],
2767 )
2768 )
2769 for query, snippets in queries_and_snippets:
2770 self.assertFalse(query.any(execute=False, exact=False))
2771 self.assertFalse(query.any(execute=True, exact=False))
2772 self.assertFalse(query.any(execute=True, exact=True))
2773 self.assertEqual(query.count(exact=False), 0)
2774 self.assertEqual(query.count(exact=True), 0)
2775 messages = list(query.explain_no_results())
2776 self.assertTrue(messages)
2777 # Want all expected snippets to appear in at least one message.
2778 self.assertTrue(
2779 any(
2780 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2781 ),
2782 messages,
2783 )
2785 # This query does yield results, but should also emit a warning because
2786 # dataset type patterns to queryDataIds is deprecated; just look for
2787 # the warning.
2788 with self.assertWarns(FutureWarning):
2789 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2791 # These queries yield no results due to problems that can be identified
2792 # by cheap follow-up queries, yielding helpful diagnostics.
2793 for query, snippets in [
2794 (
2795 # No records for one of the involved dimensions.
2796 registry.queryDataIds(["subfilter"]),
2797 ["no rows", "subfilter"],
2798 ),
2799 (
2800 # No records for one of the involved dimensions.
2801 registry.queryDimensionRecords("subfilter"),
2802 ["no rows", "subfilter"],
2803 ),
2804 ]:
2805 self.assertFalse(query.any(execute=True, exact=False))
2806 self.assertFalse(query.any(execute=True, exact=True))
2807 self.assertEqual(query.count(exact=True), 0)
2808 messages = list(query.explain_no_results())
2809 self.assertTrue(messages)
2810 # Want all expected snippets to appear in at least one message.
2811 self.assertTrue(
2812 any(
2813 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2814 ),
2815 messages,
2816 )
2818 # This query yields four overlaps in the database, but one is filtered
2819 # out in postprocessing. The count queries aren't accurate because
2820 # they don't account for duplication that happens due to an internal
2821 # join against commonSkyPix.
2822 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2823 self.assertEqual(
2824 {
2825 DataCoordinate.standardize(
2826 instrument="Cam1",
2827 skymap="SkyMap1",
2828 visit=v,
2829 tract=t,
2830 universe=registry.dimensions,
2831 )
2832 for v, t in [(1, 0), (2, 0), (2, 1)]
2833 },
2834 set(query3),
2835 )
2836 self.assertTrue(query3.any(execute=False, exact=False))
2837 self.assertTrue(query3.any(execute=True, exact=False))
2838 self.assertTrue(query3.any(execute=True, exact=True))
2839 self.assertGreaterEqual(query3.count(exact=False), 4)
2840 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2841 self.assertFalse(list(query3.explain_no_results()))
2842 # This query yields overlaps in the database, but all are filtered
2843 # out in postprocessing. The count queries again aren't very useful.
2844 # We have to use `where=` here to avoid an optimization that
2845 # (currently) skips the spatial postprocess-filtering because it
2846 # recognizes that no spatial join is necessary. That's not ideal, but
2847 # fixing it is out of scope for this ticket.
2848 query4 = registry.queryDataIds(
2849 ["visit", "tract"],
2850 instrument="Cam1",
2851 skymap="SkyMap1",
2852 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2853 )
2854 self.assertFalse(set(query4))
2855 self.assertTrue(query4.any(execute=False, exact=False))
2856 self.assertTrue(query4.any(execute=True, exact=False))
2857 self.assertFalse(query4.any(execute=True, exact=True))
2858 self.assertGreaterEqual(query4.count(exact=False), 1)
2859 self.assertEqual(query4.count(exact=True, discard=True), 0)
2860 messages = query4.explain_no_results()
2861 self.assertTrue(messages)
2862 self.assertTrue(any("overlap" in message for message in messages))
2863 # This query should yield results from one dataset type but not the
2864 # other, which is not registered.
2865 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2866 self.assertTrue(set(query5))
2867 self.assertTrue(query5.any(execute=False, exact=False))
2868 self.assertTrue(query5.any(execute=True, exact=False))
2869 self.assertTrue(query5.any(execute=True, exact=True))
2870 self.assertGreaterEqual(query5.count(exact=False), 1)
2871 self.assertGreaterEqual(query5.count(exact=True), 1)
2872 self.assertFalse(list(query5.explain_no_results()))
2873 # This query applies a selection that yields no results, fully in the
2874 # database. Explaining why it fails involves traversing the relation
2875 # tree and running a LIMIT 1 query at each level that has the potential
2876 # to remove rows.
2877 query6 = registry.queryDimensionRecords(
2878 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2879 )
2880 self.assertEqual(query6.count(exact=True), 0)
2881 messages = query6.explain_no_results()
2882 self.assertTrue(messages)
2883 self.assertTrue(any("no-purpose" in message for message in messages))
2885 def testQueryDataIdsExpressionError(self):
2886 """Test error checking of 'where' expressions in queryDataIds."""
2887 registry = self.makeRegistry()
2888 self.loadData(registry, "base.yaml")
2889 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2890 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2891 registry.queryDataIds(["detector"], where="foo.bar = 12")
2892 with self.assertRaisesRegex(
2893 LookupError, "Dimension element name cannot be inferred in this context."
2894 ):
2895 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2897 def testQueryDataIdsOrderBy(self):
2898 """Test order_by and limit on result returned by queryDataIds()."""
2899 registry = self.makeRegistry()
2900 self.loadData(registry, "base.yaml")
2901 self.loadData(registry, "datasets.yaml")
2902 self.loadData(registry, "spatial.yaml")
2904 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2905 return registry.queryDataIds(
2906 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2907 )
2909 Test = namedtuple(
2910 "testQueryDataIdsOrderByTest",
2911 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2912 defaults=(None, None, None),
2913 )
2915 test_data = (
2916 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2917 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2918 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2919 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2920 Test(
2921 "tract.id,visit.id",
2922 "tract,visit",
2923 ((0, 1), (0, 1), (0, 2)),
2924 limit=(3,),
2925 ),
2926 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2927 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2928 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2929 Test(
2930 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2931 ),
2932 Test(
2933 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2934 ),
2935 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2936 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2937 Test(
2938 "tract,-timespan.begin,timespan.end",
2939 "tract,visit",
2940 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2941 ),
2942 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2943 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2944 Test(
2945 "tract,detector",
2946 "tract,detector",
2947 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2948 datasets="flat",
2949 collections="imported_r",
2950 ),
2951 Test(
2952 "tract,detector.full_name",
2953 "tract,detector",
2954 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2955 datasets="flat",
2956 collections="imported_r",
2957 ),
2958 Test(
2959 "tract,detector.raft,detector.name_in_raft",
2960 "tract,detector",
2961 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2962 datasets="flat",
2963 collections="imported_r",
2964 ),
2965 )
2967 for test in test_data:
2968 order_by = test.order_by.split(",")
2969 keys = test.keys.split(",")
2970 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2971 if test.limit is not None:
2972 query = query.limit(*test.limit)
2973 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2974 self.assertEqual(dataIds, test.result)
2976 # and materialize
2977 query = do_query(keys).order_by(*order_by)
2978 if test.limit is not None:
2979 query = query.limit(*test.limit)
2980 with self.assertRaises(RelationalAlgebraError):
2981 with query.materialize():
2982 pass
2984 # errors in a name
2985 for order_by in ("", "-"):
2986 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2987 list(do_query().order_by(order_by))
2989 for order_by in ("undimension.name", "-undimension.name"):
2990 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2991 list(do_query().order_by(order_by))
2993 for order_by in ("attract", "-attract"):
2994 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2995 list(do_query().order_by(order_by))
2997 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2998 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3000 with self.assertRaisesRegex(
3001 ValueError,
3002 r"Timespan exists in more than one dimension element \(exposure, visit\); "
3003 r"qualify timespan with specific dimension name\.",
3004 ):
3005 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3007 with self.assertRaisesRegex(
3008 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3009 ):
3010 list(do_query("tract").order_by("timespan.begin"))
3012 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3013 list(do_query("tract").order_by("tract.timespan.begin"))
3015 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3016 list(do_query("tract").order_by("tract.name"))
3018 with self.assertRaisesRegex(
3019 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3020 ):
3021 list(do_query("visit").order_by("timestamp.begin"))
3023 def testQueryDataIdsGovernorExceptions(self):
3024 """Test exceptions raised by queryDataIds() for incorrect governors."""
3025 registry = self.makeRegistry()
3026 self.loadData(registry, "base.yaml")
3027 self.loadData(registry, "datasets.yaml")
3028 self.loadData(registry, "spatial.yaml")
3030 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3031 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3033 Test = namedtuple(
3034 "testQueryDataIdExceptionsTest",
3035 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3036 defaults=(None, None, None, {}, None, 0),
3037 )
3039 test_data = (
3040 Test("tract,visit", count=6),
3041 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3042 Test(
3043 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3044 ),
3045 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3046 Test(
3047 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3048 ),
3049 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3050 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3051 Test(
3052 "tract,visit",
3053 where="instrument=cam AND skymap=map",
3054 bind={"cam": "Cam1", "map": "SkyMap1"},
3055 count=6,
3056 ),
3057 Test(
3058 "tract,visit",
3059 where="instrument=cam AND skymap=map",
3060 bind={"cam": "Cam", "map": "SkyMap"},
3061 exception=DataIdValueError,
3062 ),
3063 )
3065 for test in test_data:
3066 dimensions = test.dimensions.split(",")
3067 if test.exception:
3068 with self.assertRaises(test.exception):
3069 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3070 else:
3071 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3072 self.assertEqual(query.count(discard=True), test.count)
3074 # and materialize
3075 if test.exception:
3076 with self.assertRaises(test.exception):
3077 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3078 with query.materialize() as materialized:
3079 materialized.count(discard=True)
3080 else:
3081 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3082 with query.materialize() as materialized:
3083 self.assertEqual(materialized.count(discard=True), test.count)
3085 def testQueryDimensionRecordsOrderBy(self):
3086 """Test order_by and limit on result returned by
3087 queryDimensionRecords().
3088 """
3089 registry = self.makeRegistry()
3090 self.loadData(registry, "base.yaml")
3091 self.loadData(registry, "datasets.yaml")
3092 self.loadData(registry, "spatial.yaml")
3094 def do_query(element, datasets=None, collections=None):
3095 return registry.queryDimensionRecords(
3096 element, instrument="Cam1", datasets=datasets, collections=collections
3097 )
3099 query = do_query("detector")
3100 self.assertEqual(len(list(query)), 4)
3102 Test = namedtuple(
3103 "testQueryDataIdsOrderByTest",
3104 ("element", "order_by", "result", "limit", "datasets", "collections"),
3105 defaults=(None, None, None),
3106 )
3108 test_data = (
3109 Test("detector", "detector", (1, 2, 3, 4)),
3110 Test("detector", "-detector", (4, 3, 2, 1)),
3111 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3112 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3113 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3114 Test("visit", "visit", (1, 2)),
3115 Test("visit", "-visit.id", (2, 1)),
3116 Test("visit", "zenith_angle", (1, 2)),
3117 Test("visit", "-visit.name", (2, 1)),
3118 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3119 )
3121 for test in test_data:
3122 order_by = test.order_by.split(",")
3123 query = do_query(test.element).order_by(*order_by)
3124 if test.limit is not None:
3125 query = query.limit(*test.limit)
3126 dataIds = tuple(rec.id for rec in query)
3127 self.assertEqual(dataIds, test.result)
3129 # errors in a name
3130 for order_by in ("", "-"):
3131 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3132 list(do_query("detector").order_by(order_by))
3134 for order_by in ("undimension.name", "-undimension.name"):
3135 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3136 list(do_query("detector").order_by(order_by))
3138 for order_by in ("attract", "-attract"):
3139 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3140 list(do_query("detector").order_by(order_by))
3142 for order_by in ("timestamp.begin", "-timestamp.begin"):
3143 with self.assertRaisesRegex(
3144 ValueError,
3145 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3146 r"perhaps you meant 'timespan.begin'\?",
3147 ):
3148 list(do_query("visit").order_by(order_by))
3150 def testQueryDimensionRecordsExceptions(self):
3151 """Test exceptions raised by queryDimensionRecords()."""
3152 registry = self.makeRegistry()
3153 self.loadData(registry, "base.yaml")
3154 self.loadData(registry, "datasets.yaml")
3155 self.loadData(registry, "spatial.yaml")
3157 result = registry.queryDimensionRecords("detector")
3158 self.assertEqual(result.count(), 4)
3159 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3160 self.assertEqual(result.count(), 4)
3161 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3162 self.assertEqual(result.count(), 4)
3163 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3164 self.assertEqual(result.count(), 4)
3165 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3166 self.assertEqual(result.count(), 4)
3168 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3169 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3170 result.count()
3172 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3173 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3174 result.count()
3176 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3177 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3178 result.count()
3180 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3181 result = registry.queryDimensionRecords(
3182 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3183 )
3184 result.count()
3186 def testDatasetConstrainedDimensionRecordQueries(self):
3187 """Test that queryDimensionRecords works even when given a dataset
3188 constraint whose dimensions extend beyond the requested dimension
3189 element's.
3190 """
3191 registry = self.makeRegistry()
3192 self.loadData(registry, "base.yaml")
3193 self.loadData(registry, "datasets.yaml")
3194 # Query for physical_filter dimension records, using a dataset that
3195 # has both physical_filter and dataset dimensions.
3196 records = registry.queryDimensionRecords(
3197 "physical_filter",
3198 datasets=["flat"],
3199 collections="imported_r",
3200 )
3201 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3202 # Trying to constrain by all dataset types is an error.
3203 with self.assertRaises(TypeError):
3204 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3206 def testSkyPixDatasetQueries(self):
3207 """Test that we can build queries involving skypix dimensions as long
3208 as a dataset type that uses those dimensions is included.
3209 """
3210 registry = self.makeRegistry()
3211 self.loadData(registry, "base.yaml")
3212 dataset_type = DatasetType(
3213 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3214 )
3215 registry.registerDatasetType(dataset_type)
3216 run = "r"
3217 registry.registerRun(run)
3218 # First try queries where there are no datasets; the concern is whether
3219 # we can even build and execute these queries without raising, even
3220 # when "doomed" query shortcuts are in play.
3221 self.assertFalse(
3222 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3223 )
3224 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3225 # Now add a dataset and see that we can get it back.
3226 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3227 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3228 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3229 self.assertEqual(
3230 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3231 {data_id},
3232 )
3233 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3235 def testDatasetIdFactory(self):
3236 """Simple test for DatasetIdFactory, mostly to catch potential changes
3237 in its API.
3238 """
3239 registry = self.makeRegistry()
3240 factory = DatasetIdFactory()
3241 dataset_type = DatasetType(
3242 "datasetType",
3243 dimensions=["detector", "instrument"],
3244 universe=registry.dimensions,
3245 storageClass="int",
3246 )
3247 run = "run"
3248 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3250 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3251 self.assertIsInstance(datasetId, uuid.UUID)
3252 self.assertEqual(datasetId.version, 4)
3254 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3255 self.assertIsInstance(datasetId, uuid.UUID)
3256 self.assertEqual(datasetId.version, 5)
3258 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3259 self.assertIsInstance(datasetId, uuid.UUID)
3260 self.assertEqual(datasetId.version, 5)
3262 def testExposureQueries(self):
3263 """Test query methods using arguments sourced from the exposure log
3264 service.
3266 The most complete test dataset currently available to daf_butler tests
3267 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3268 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3269 dimension records as it was focused on providing nontrivial spatial
3270 overlaps between visit+detector and tract+patch. So in this test we
3271 need to translate queries that originally used the exposure dimension
3272 to use the (very similar) visit dimension instead.
3273 """
3274 registry = self.makeRegistry()
3275 self.loadData(registry, "hsc-rc2-subset.yaml")
3276 self.assertEqual(
3277 [
3278 record.id
3279 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3280 .order_by("id")
3281 .limit(5)
3282 ],
3283 [318, 322, 326, 330, 332],
3284 )
3285 self.assertEqual(
3286 [
3287 data_id["visit"]
3288 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3289 ],
3290 [318, 322, 326, 330, 332],
3291 )
3292 self.assertEqual(
3293 [
3294 record.id
3295 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3296 .order_by("full_name")
3297 .limit(5)
3298 ],
3299 [73, 72, 71, 70, 65],
3300 )
3301 self.assertEqual(
3302 [
3303 data_id["detector"]
3304 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3305 .order_by("full_name")
3306 .limit(5)
3307 ],
3308 [73, 72, 71, 70, 65],
3309 )
3311 def test_long_query_names(self) -> None:
3312 """Test that queries involving very long names are handled correctly.
3314 This is especially important for PostgreSQL, which truncates symbols
3315 longer than 64 chars, but it's worth testing for all DBs.
3316 """
3317 registry = self.makeRegistry()
3318 name = "abcd" * 17
3319 registry.registerDatasetType(
3320 DatasetType(
3321 name,
3322 dimensions=(),
3323 storageClass="Exposure",
3324 universe=registry.dimensions,
3325 )
3326 )
3327 # Need to search more than one collection actually containing a
3328 # matching dataset to avoid optimizations that sidestep bugs due to
3329 # truncation by making findFirst=True a no-op.
3330 run1 = "run1"
3331 registry.registerRun(run1)
3332 run2 = "run2"
3333 registry.registerRun(run2)
3334 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3335 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3336 self.assertEqual(
3337 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3338 {ref1},
3339 )
3341 def test_skypix_constraint_queries(self) -> None:
3342 """Test queries spatially constrained by a skypix data ID."""
3343 registry = self.makeRegistry()
3344 self.loadData(registry, "hsc-rc2-subset.yaml")
3345 patch_regions = {
3346 (data_id["tract"], data_id["patch"]): data_id.region
3347 for data_id in registry.queryDataIds(["patch"]).expanded()
3348 }
3349 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3350 # This check ensures the test doesn't become trivial due to a config
3351 # change; if it does, just pick a different HTML level.
3352 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3353 # Gather all skypix IDs that definitely overlap at least one of these
3354 # patches.
3355 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3356 for patch_region in patch_regions.values():
3357 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3358 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3359 # and does not overlap at least one other patch.
3360 for skypix_id in itertools.chain.from_iterable(
3361 range(begin, end) for begin, end in relevant_skypix_ids
3362 ):
3363 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3364 overlapping_patches = {
3365 patch_key
3366 for patch_key, patch_region in patch_regions.items()
3367 if not patch_region.isDisjointFrom(skypix_region)
3368 }
3369 if overlapping_patches and overlapping_patches != patch_regions.keys():
3370 break
3371 else:
3372 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3373 self.assertEqual(
3374 {
3375 (data_id["tract"], data_id["patch"])
3376 for data_id in registry.queryDataIds(
3377 ["patch"],
3378 dataId={skypix_dimension.name: skypix_id},
3379 )
3380 },
3381 overlapping_patches,
3382 )
3383 # Test that a three-way join that includes the common skypix system in
3384 # the dimensions doesn't generate redundant join terms in the query.
3385 full_data_ids = set(
3386 registry.queryDataIds(
3387 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3388 ).expanded()
3389 )
3390 self.assertGreater(len(full_data_ids), 0)
3391 for data_id in full_data_ids:
3392 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3393 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3395 def test_spatial_constraint_queries(self) -> None:
3396 """Test queries in which one spatial dimension in the constraint (data
3397 ID or ``where`` string) constrains a different spatial dimension in the
3398 query result columns.
3399 """
3400 registry = self.makeRegistry()
3401 self.loadData(registry, "hsc-rc2-subset.yaml")
3402 patch_regions = {
3403 (data_id["tract"], data_id["patch"]): data_id.region
3404 for data_id in registry.queryDataIds(["patch"]).expanded()
3405 }
3406 observation_regions = {
3407 (data_id["visit"], data_id["detector"]): data_id.region
3408 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3409 }
3410 all_combos = {
3411 (patch_key, observation_key)
3412 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3413 }
3414 overlapping_combos = {
3415 (patch_key, observation_key)
3416 for patch_key, observation_key in all_combos
3417 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3418 }
3419 # Check a direct spatial join with no constraint first.
3420 self.assertEqual(
3421 {
3422 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3423 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3424 },
3425 overlapping_combos,
3426 )
3427 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3428 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3429 for patch_key, observation_key in overlapping_combos:
3430 overlaps_by_patch[patch_key].add(observation_key)
3431 overlaps_by_observation[observation_key].add(patch_key)
3432 # Find patches and observations that overlap at least one of the other
3433 # but not all of the other.
3434 nontrivial_patch = next(
3435 iter(
3436 patch_key
3437 for patch_key, observation_keys in overlaps_by_patch.items()
3438 if observation_keys and observation_keys != observation_regions.keys()
3439 )
3440 )
3441 nontrivial_observation = next(
3442 iter(
3443 observation_key
3444 for observation_key, patch_keys in overlaps_by_observation.items()
3445 if patch_keys and patch_keys != patch_regions.keys()
3446 )
3447 )
3448 # Use the nontrivial patches and observations as constraints on the
3449 # other dimensions in various ways, first via a 'where' expression.
3450 # It's better in general to us 'bind' instead of f-strings, but these
3451 # all integers so there are no quoting concerns.
3452 self.assertEqual(
3453 {
3454 (data_id["visit"], data_id["detector"])
3455 for data_id in registry.queryDataIds(
3456 ["visit", "detector"],
3457 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3458 skymap="hsc_rings_v1",
3459 )
3460 },
3461 overlaps_by_patch[nontrivial_patch],
3462 )
3463 self.assertEqual(
3464 {
3465 (data_id["tract"], data_id["patch"])
3466 for data_id in registry.queryDataIds(
3467 ["patch"],
3468 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3469 instrument="HSC",
3470 )
3471 },
3472 overlaps_by_observation[nontrivial_observation],
3473 )
3474 # and then via the dataId argument.
3475 self.assertEqual(
3476 {
3477 (data_id["visit"], data_id["detector"])
3478 for data_id in registry.queryDataIds(
3479 ["visit", "detector"],
3480 dataId={
3481 "tract": nontrivial_patch[0],
3482 "patch": nontrivial_patch[1],
3483 },
3484 skymap="hsc_rings_v1",
3485 )
3486 },
3487 overlaps_by_patch[nontrivial_patch],
3488 )
3489 self.assertEqual(
3490 {
3491 (data_id["tract"], data_id["patch"])
3492 for data_id in registry.queryDataIds(
3493 ["patch"],
3494 dataId={
3495 "visit": nontrivial_observation[0],
3496 "detector": nontrivial_observation[1],
3497 },
3498 instrument="HSC",
3499 )
3500 },
3501 overlaps_by_observation[nontrivial_observation],
3502 )
3504 def test_query_projection_drop_postprocessing(self) -> None:
3505 """Test that projections and deduplications on query objects can
3506 drop post-query region filtering to ensure the query remains in
3507 the SQL engine.
3508 """
3509 registry = self.makeRegistry()
3510 self.loadData(registry, "base.yaml")
3511 self.loadData(registry, "spatial.yaml")
3513 def pop_transfer(tree: Relation) -> Relation:
3514 """If a relation tree terminates with a transfer to a new engine,
3515 return the relation prior to that transfer. If not, return the
3516 original relation.
3517 """
3518 match tree:
3519 case Transfer(target=target):
3520 return target
3521 case _:
3522 return tree
3524 # There's no public way to get a Query object yet, so we get one from a
3525 # DataCoordinateQueryResults private attribute. When a public API is
3526 # available this test should use it.
3527 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3528 # We expect this query to terminate in the iteration engine originally,
3529 # because region-filtering is necessary.
3530 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3531 # If we deduplicate, we usually have to do that downstream of the
3532 # filtering. That means the deduplication has to happen in the
3533 # iteration engine.
3534 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3535 # If we pass drop_postprocessing, we instead drop the region filtering
3536 # so the deduplication can happen in SQL (though there might still be
3537 # transfer to iteration at the tail of the tree that we can ignore;
3538 # that's what the pop_transfer takes care of here).
3539 self.assertIsInstance(
3540 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3541 sql.Engine,
3542 )
3544 def test_query_find_datasets_drop_postprocessing(self) -> None:
3545 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3546 problems with the FindFirstDataset relation operation.
3547 """
3548 # Setup: load some visit, tract, and patch records, and insert two
3549 # datasets with dimensions {visit, patch}, with one in each of two
3550 # RUN collections.
3551 registry = self.makeRegistry()
3552 self.loadData(registry, "base.yaml")
3553 self.loadData(registry, "spatial.yaml")
3554 storage_class = StorageClass("Warpy")
3555 registry.storageClasses.registerStorageClass(storage_class)
3556 dataset_type = DatasetType(
3557 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3558 )
3559 registry.registerDatasetType(dataset_type)
3560 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3561 registry.registerRun("run1")
3562 registry.registerRun("run2")
3563 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3564 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3565 # Query for the dataset using queryDataIds(...).findDatasets(...)
3566 # against only one of the two collections. This should work even
3567 # though the relation returned by queryDataIds ends with
3568 # iteration-engine region-filtering, because we can recognize before
3569 # running the query that there is only one collecton to search and
3570 # hence the (default) findFirst=True is irrelevant, and joining in the
3571 # dataset query commutes past the iteration-engine postprocessing.
3572 query1 = registry.queryDataIds(
3573 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3574 )
3575 self.assertEqual(
3576 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3577 {ref1},
3578 )
3579 # Query for the dataset using queryDataIds(...).findDatasets(...)
3580 # against both collections. This can only work if the FindFirstDataset
3581 # operation can be commuted past the iteration-engine options into SQL.
3582 query2 = registry.queryDataIds(
3583 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3584 )
3585 self.assertEqual(
3586 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3587 {ref2},
3588 )
3590 def test_query_empty_collections(self) -> None:
3591 """Test for registry query methods with empty collections. The methods
3592 should return empty result set (or None when applicable) and provide
3593 "doomed" diagnostics.
3594 """
3595 registry = self.makeRegistry()
3596 self.loadData(registry, "base.yaml")
3597 self.loadData(registry, "datasets.yaml")
3599 # Tests for registry.findDataset()
3600 with self.assertRaises(NoDefaultCollectionError):
3601 registry.findDataset("bias", instrument="Cam1", detector=1)
3602 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3603 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3605 # Tests for registry.queryDatasets()
3606 with self.assertRaises(NoDefaultCollectionError):
3607 registry.queryDatasets("bias")
3608 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3610 result = registry.queryDatasets("bias", collections=[])
3611 self.assertEqual(len(list(result)), 0)
3612 messages = list(result.explain_no_results())
3613 self.assertTrue(messages)
3614 self.assertTrue(any("because collection list is empty" in message for message in messages))
3616 # Tests for registry.queryDataIds()
3617 with self.assertRaises(NoDefaultCollectionError):
3618 registry.queryDataIds("detector", datasets="bias")
3619 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3621 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3622 self.assertEqual(len(list(result)), 0)
3623 messages = list(result.explain_no_results())
3624 self.assertTrue(messages)
3625 self.assertTrue(any("because collection list is empty" in message for message in messages))
3627 # Tests for registry.queryDimensionRecords()
3628 with self.assertRaises(NoDefaultCollectionError):
3629 registry.queryDimensionRecords("detector", datasets="bias")
3630 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3632 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3633 self.assertEqual(len(list(result)), 0)
3634 messages = list(result.explain_no_results())
3635 self.assertTrue(messages)
3636 self.assertTrue(any("because collection list is empty" in message for message in messages))
3638 def test_dataset_followup_spatial_joins(self) -> None:
3639 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3640 is involved.
3641 """
3642 registry = self.makeRegistry()
3643 self.loadData(registry, "base.yaml")
3644 self.loadData(registry, "spatial.yaml")
3645 pvi_dataset_type = DatasetType(
3646 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3647 )
3648 registry.registerDatasetType(pvi_dataset_type)
3649 collection = "datasets"
3650 registry.registerRun(collection)
3651 (pvi1,) = registry.insertDatasets(
3652 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3653 )
3654 (pvi2,) = registry.insertDatasets(
3655 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3656 )
3657 (pvi3,) = registry.insertDatasets(
3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3659 )
3660 self.assertEqual(
3661 set(
3662 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3663 .expanded()
3664 .findRelatedDatasets("pvi", [collection])
3665 ),
3666 {
3667 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3668 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3669 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3670 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3671 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3672 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3675 },
3676 )