Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1513 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-10-02 08:00 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29__all__ = ["RegistryTests"]
31import itertools
32import logging
33import os
34import re
35import unittest
36import uuid
37from abc import ABC, abstractmethod
38from collections import defaultdict, namedtuple
39from collections.abc import Iterator
40from datetime import datetime, timedelta
41from typing import TYPE_CHECKING
43import astropy.time
44import sqlalchemy
46try:
47 import numpy as np
48except ImportError:
49 np = None
51import lsst.sphgeom
52from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
54from ...core import (
55 DataCoordinate,
56 DataCoordinateSet,
57 DatasetAssociation,
58 DatasetIdFactory,
59 DatasetIdGenEnum,
60 DatasetRef,
61 DatasetType,
62 DimensionGraph,
63 NamedValueSet,
64 SkyPixDimension,
65 StorageClass,
66 Timespan,
67 ddl,
68)
69from .._collection_summary import CollectionSummary
70from .._collectionType import CollectionType
71from .._config import RegistryConfig
72from .._exceptions import (
73 ArgumentError,
74 CollectionError,
75 CollectionTypeError,
76 ConflictingDefinitionError,
77 DataIdValueError,
78 DatasetTypeError,
79 InconsistentDataIdError,
80 MissingCollectionError,
81 MissingDatasetTypeError,
82 NoDefaultCollectionError,
83 OrphanedRecordError,
84)
85from ..interfaces import ButlerAttributeExistsError
87if TYPE_CHECKING:
88 from .._registry import Registry
91class RegistryTests(ABC):
92 """Generic tests for the `Registry` class that can be subclassed to
93 generate tests for different configurations.
94 """
96 collectionsManager: str | None = None
97 """Name of the collections manager class, if subclass provides value for
98 this member then it overrides name specified in default configuration
99 (`str`).
100 """
102 datasetsManager: str | dict[str, str] | None = None
103 """Name or configuration dictionary of the datasets manager class, if
104 subclass provides value for this member then it overrides name specified
105 in default configuration (`str` or `dict`).
106 """
108 @classmethod
109 @abstractmethod
110 def getDataDir(cls) -> str:
111 """Return the root directory containing test data YAML files."""
112 raise NotImplementedError()
114 def makeRegistryConfig(self) -> RegistryConfig:
115 """Create RegistryConfig used to create a registry.
117 This method should be called by a subclass from `makeRegistry`.
118 Returned instance will be pre-configured based on the values of class
119 members, and default-configured for all other parameters. Subclasses
120 that need default configuration should just instantiate
121 `RegistryConfig` directly.
122 """
123 config = RegistryConfig()
124 if self.collectionsManager:
125 config["managers", "collections"] = self.collectionsManager
126 if self.datasetsManager:
127 config["managers", "datasets"] = self.datasetsManager
128 return config
130 @abstractmethod
131 def makeRegistry(self, share_repo_with: Registry | None = None) -> Registry | None:
132 """Return the Registry instance to be tested.
134 Parameters
135 ----------
136 share_repo_with : `Registry`, optional
137 If provided, the new registry should point to the same data
138 repository as this existing registry.
140 Returns
141 -------
142 registry : `Registry`
143 New `Registry` instance, or `None` *only* if `share_repo_with` is
144 not `None` and this test case does not support that argument
145 (e.g. it is impossible with in-memory SQLite DBs).
146 """
147 raise NotImplementedError()
149 def loadData(self, registry: Registry, filename: str):
150 """Load registry test data from ``getDataDir/<filename>``,
151 which should be a YAML import/export file.
152 """
153 from ...transfers import YamlRepoImportBackend
155 with open(os.path.join(self.getDataDir(), filename)) as stream:
156 backend = YamlRepoImportBackend(stream, registry)
157 backend.register()
158 backend.load(datastore=None)
160 def checkQueryResults(self, results, expected):
161 """Check that a query results object contains expected values.
163 Parameters
164 ----------
165 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
166 A lazy-evaluation query results object.
167 expected : `list`
168 A list of `DataCoordinate` o `DatasetRef` objects that should be
169 equal to results of the query, aside from ordering.
170 """
171 self.assertCountEqual(list(results), expected)
172 self.assertEqual(results.count(), len(expected))
173 if expected:
174 self.assertTrue(results.any())
175 else:
176 self.assertFalse(results.any())
178 def testOpaque(self):
179 """Tests for `Registry.registerOpaqueTable`,
180 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
181 `Registry.deleteOpaqueData`.
182 """
183 registry = self.makeRegistry()
184 table = "opaque_table_for_testing"
185 registry.registerOpaqueTable(
186 table,
187 spec=ddl.TableSpec(
188 fields=[
189 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
190 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
191 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
192 ],
193 ),
194 )
195 rows = [
196 {"id": 1, "name": "one", "count": None},
197 {"id": 2, "name": "two", "count": 5},
198 {"id": 3, "name": "three", "count": 6},
199 ]
200 registry.insertOpaqueData(table, *rows)
201 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
202 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
203 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
204 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
205 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
206 # Test very long IN clause which exceeds sqlite limit on number of
207 # parameters. SQLite says the limit is 32k but it looks like it is
208 # much higher.
209 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
210 # Two IN clauses, each longer than 1k batch size, first with
211 # duplicates, second has matching elements in different batches (after
212 # sorting).
213 self.assertEqual(
214 rows[0:2],
215 list(
216 registry.fetchOpaqueData(
217 table,
218 id=list(range(1000)) + list(range(100, 0, -1)),
219 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
220 )
221 ),
222 )
223 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
224 registry.deleteOpaqueData(table, id=3)
225 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
226 registry.deleteOpaqueData(table)
227 self.assertEqual([], list(registry.fetchOpaqueData(table)))
229 def testDatasetType(self):
230 """Tests for `Registry.registerDatasetType` and
231 `Registry.getDatasetType`.
232 """
233 registry = self.makeRegistry()
234 # Check valid insert
235 datasetTypeName = "test"
236 storageClass = StorageClass("testDatasetType")
237 registry.storageClasses.registerStorageClass(storageClass)
238 dimensions = registry.dimensions.extract(("instrument", "visit"))
239 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
240 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
241 # Inserting for the first time should return True
242 self.assertTrue(registry.registerDatasetType(inDatasetType))
243 outDatasetType1 = registry.getDatasetType(datasetTypeName)
244 self.assertEqual(outDatasetType1, inDatasetType)
246 # Re-inserting should work
247 self.assertFalse(registry.registerDatasetType(inDatasetType))
248 # Except when they are not identical
249 with self.assertRaises(ConflictingDefinitionError):
250 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
251 registry.registerDatasetType(nonIdenticalDatasetType)
253 # Template can be None
254 datasetTypeName = "testNoneTemplate"
255 storageClass = StorageClass("testDatasetType2")
256 registry.storageClasses.registerStorageClass(storageClass)
257 dimensions = registry.dimensions.extract(("instrument", "visit"))
258 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
259 registry.registerDatasetType(inDatasetType)
260 outDatasetType2 = registry.getDatasetType(datasetTypeName)
261 self.assertEqual(outDatasetType2, inDatasetType)
263 allTypes = set(registry.queryDatasetTypes())
264 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
266 def testDimensions(self):
267 """Tests for `Registry.insertDimensionData`,
268 `Registry.syncDimensionData`, and `Registry.expandDataId`.
269 """
270 registry = self.makeRegistry()
271 dimensionName = "instrument"
272 dimension = registry.dimensions[dimensionName]
273 dimensionValue = {
274 "name": "DummyCam",
275 "visit_max": 10,
276 "visit_system": 0,
277 "exposure_max": 10,
278 "detector_max": 2,
279 "class_name": "lsst.pipe.base.Instrument",
280 }
281 registry.insertDimensionData(dimensionName, dimensionValue)
282 # Inserting the same value twice should fail
283 with self.assertRaises(sqlalchemy.exc.IntegrityError):
284 registry.insertDimensionData(dimensionName, dimensionValue)
285 # expandDataId should retrieve the record we just inserted
286 self.assertEqual(
287 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
288 .records[dimensionName]
289 .toDict(),
290 dimensionValue,
291 )
292 # expandDataId should raise if there is no record with the given ID.
293 with self.assertRaises(DataIdValueError):
294 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
295 # band doesn't have a table; insert should fail.
296 with self.assertRaises(TypeError):
297 registry.insertDimensionData("band", {"band": "i"})
298 dimensionName2 = "physical_filter"
299 dimension2 = registry.dimensions[dimensionName2]
300 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
301 # Missing required dependency ("instrument") should fail
302 with self.assertRaises(KeyError):
303 registry.insertDimensionData(dimensionName2, dimensionValue2)
304 # Adding required dependency should fix the failure
305 dimensionValue2["instrument"] = "DummyCam"
306 registry.insertDimensionData(dimensionName2, dimensionValue2)
307 # expandDataId should retrieve the record we just inserted.
308 self.assertEqual(
309 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
310 .records[dimensionName2]
311 .toDict(),
312 dimensionValue2,
313 )
314 # Use syncDimensionData to insert a new record successfully.
315 dimensionName3 = "detector"
316 dimensionValue3 = {
317 "instrument": "DummyCam",
318 "id": 1,
319 "full_name": "one",
320 "name_in_raft": "zero",
321 "purpose": "SCIENCE",
322 }
323 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
324 # Sync that again. Note that one field ("raft") is NULL, and that
325 # should be okay.
326 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
327 # Now try that sync with the same primary key but a different value.
328 # This should fail.
329 with self.assertRaises(ConflictingDefinitionError):
330 registry.syncDimensionData(
331 dimensionName3,
332 {
333 "instrument": "DummyCam",
334 "id": 1,
335 "full_name": "one",
336 "name_in_raft": "four",
337 "purpose": "SCIENCE",
338 },
339 )
341 @unittest.skipIf(np is None, "numpy not available.")
342 def testNumpyDataId(self):
343 """Test that we can use a numpy int in a dataId."""
344 registry = self.makeRegistry()
345 dimensionEntries = [
346 ("instrument", {"instrument": "DummyCam"}),
347 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
348 # Using an np.int64 here fails unless Records.fromDict is also
349 # patched to look for numbers.Integral
350 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
351 ]
352 for args in dimensionEntries:
353 registry.insertDimensionData(*args)
355 # Try a normal integer and something that looks like an int but
356 # is not.
357 for visit_id in (42, np.int64(42)):
358 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
359 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
360 self.assertEqual(expanded["visit"], int(visit_id))
361 self.assertIsInstance(expanded["visit"], int)
363 def testDataIdRelationships(self):
364 """Test that `Registry.expandDataId` raises an exception when the given
365 keys are inconsistent.
366 """
367 registry = self.makeRegistry()
368 self.loadData(registry, "base.yaml")
369 # Insert a few more dimension records for the next test.
370 registry.insertDimensionData(
371 "exposure",
372 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
373 )
374 registry.insertDimensionData(
375 "exposure",
376 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
377 )
378 registry.insertDimensionData(
379 "visit_system",
380 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
381 )
382 registry.insertDimensionData(
383 "visit",
384 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
385 )
386 registry.insertDimensionData(
387 "visit_definition",
388 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
389 )
390 with self.assertRaises(InconsistentDataIdError):
391 registry.expandDataId(
392 {"instrument": "Cam1", "visit": 1, "exposure": 2},
393 )
395 def testDataset(self):
396 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
397 and `Registry.removeDatasets`.
398 """
399 registry = self.makeRegistry()
400 self.loadData(registry, "base.yaml")
401 run = "tésτ"
402 registry.registerRun(run)
403 datasetType = registry.getDatasetType("bias")
404 dataId = {"instrument": "Cam1", "detector": 2}
405 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
406 outRef = registry.getDataset(ref.id)
407 self.assertIsNotNone(ref.id)
408 self.assertEqual(ref, outRef)
409 with self.assertRaises(ConflictingDefinitionError):
410 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
411 registry.removeDatasets([ref])
412 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
414 def testFindDataset(self):
415 """Tests for `Registry.findDataset`."""
416 registry = self.makeRegistry()
417 self.loadData(registry, "base.yaml")
418 run = "tésτ"
419 datasetType = registry.getDatasetType("bias")
420 dataId = {"instrument": "Cam1", "detector": 4}
421 registry.registerRun(run)
422 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
423 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
424 self.assertEqual(outputRef, inputRef)
425 # Check that retrieval with invalid dataId raises
426 with self.assertRaises(LookupError):
427 dataId = {"instrument": "Cam1"} # no detector
428 registry.findDataset(datasetType, dataId, collections=run)
429 # Check that different dataIds match to different datasets
430 dataId1 = {"instrument": "Cam1", "detector": 1}
431 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
432 dataId2 = {"instrument": "Cam1", "detector": 2}
433 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
434 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
435 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
436 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
437 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
438 # Check that requesting a non-existing dataId returns None
439 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
440 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
441 # Search more than one collection, in which two have the right
442 # dataset type and another does not.
443 registry.registerRun("empty")
444 self.loadData(registry, "datasets-uuid.yaml")
445 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
446 self.assertIsNotNone(bias1)
447 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
448 self.assertIsNotNone(bias2)
449 self.assertEqual(
450 bias1,
451 registry.findDataset(
452 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
453 ),
454 )
455 self.assertEqual(
456 bias2,
457 registry.findDataset(
458 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
459 ),
460 )
461 # Search more than one collection, with one of them a CALIBRATION
462 # collection.
463 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
464 timespan = Timespan(
465 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
466 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
467 )
468 registry.certify("Cam1/calib", [bias2], timespan=timespan)
469 self.assertEqual(
470 bias1,
471 registry.findDataset(
472 "bias",
473 instrument="Cam1",
474 detector=2,
475 collections=["empty", "imported_g", "Cam1/calib"],
476 timespan=timespan,
477 ),
478 )
479 self.assertEqual(
480 bias2,
481 registry.findDataset(
482 "bias",
483 instrument="Cam1",
484 detector=2,
485 collections=["empty", "Cam1/calib", "imported_g"],
486 timespan=timespan,
487 ),
488 )
489 # If we try to search those same collections without a timespan, it
490 # should still work, since the CALIBRATION collection is ignored.
491 self.assertEqual(
492 bias1,
493 registry.findDataset(
494 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
495 ),
496 )
497 self.assertEqual(
498 bias1,
499 registry.findDataset(
500 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
501 ),
502 )
504 def testRemoveDatasetTypeSuccess(self):
505 """Test that Registry.removeDatasetType works when there are no
506 datasets of that type present.
507 """
508 registry = self.makeRegistry()
509 self.loadData(registry, "base.yaml")
510 registry.removeDatasetType("flat")
511 with self.assertRaises(MissingDatasetTypeError):
512 registry.getDatasetType("flat")
514 def testRemoveDatasetTypeFailure(self):
515 """Test that Registry.removeDatasetType raises when there are datasets
516 of that type present or if the dataset type is for a component.
517 """
518 registry = self.makeRegistry()
519 self.loadData(registry, "base.yaml")
520 self.loadData(registry, "datasets.yaml")
521 with self.assertRaises(OrphanedRecordError):
522 registry.removeDatasetType("flat")
523 with self.assertRaises(ValueError):
524 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
526 def testImportDatasetsUUID(self):
527 """Test for `Registry._importDatasets` with UUID dataset ID."""
528 if isinstance(self.datasetsManager, str):
529 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
530 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
531 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
532 ".ByDimensionsDatasetRecordStorageManagerUUID"
533 ):
534 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
536 registry = self.makeRegistry()
537 self.loadData(registry, "base.yaml")
538 for run in range(6):
539 registry.registerRun(f"run{run}")
540 datasetTypeBias = registry.getDatasetType("bias")
541 datasetTypeFlat = registry.getDatasetType("flat")
542 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
543 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
544 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
546 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
547 (ref1,) = registry._importDatasets([ref])
548 # UUID is used without change
549 self.assertEqual(ref.id, ref1.id)
551 # All different failure modes
552 refs = (
553 # Importing same DatasetRef with different dataset ID is an error
554 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
555 # Same DatasetId but different DataId
556 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
557 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
558 # Same DatasetRef and DatasetId but different run
559 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
560 )
561 for ref in refs:
562 with self.assertRaises(ConflictingDefinitionError):
563 registry._importDatasets([ref])
565 # Test for non-unique IDs, they can be re-imported multiple times.
566 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
567 with self.subTest(idGenMode=idGenMode):
568 # Make dataset ref with reproducible dataset ID.
569 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
570 (ref1,) = registry._importDatasets([ref])
571 self.assertIsInstance(ref1.id, uuid.UUID)
572 self.assertEqual(ref1.id.version, 5)
573 self.assertEqual(ref1.id, ref.id)
575 # Importing it again is OK
576 (ref2,) = registry._importDatasets([ref1])
577 self.assertEqual(ref2.id, ref1.id)
579 # Cannot import to different run with the same ID
580 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
581 with self.assertRaises(ConflictingDefinitionError):
582 registry._importDatasets([ref])
584 ref = DatasetRef(
585 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
586 )
587 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
588 # Cannot import same DATAID_TYPE ref into a new run
589 with self.assertRaises(ConflictingDefinitionError):
590 (ref2,) = registry._importDatasets([ref])
591 else:
592 # DATAID_TYPE_RUN ref can be imported into a new run
593 (ref2,) = registry._importDatasets([ref])
595 def testDatasetTypeComponentQueries(self):
596 """Test component options when querying for dataset types.
598 All of the behavior here is deprecated, so many of these tests are
599 currently wrapped in a context to check that we get a warning whenever
600 a component dataset is actually returned.
601 """
602 registry = self.makeRegistry()
603 self.loadData(registry, "base.yaml")
604 self.loadData(registry, "datasets.yaml")
605 # Test querying for dataset types with different inputs.
606 # First query for all dataset types; components should only be included
607 # when components=True.
608 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
609 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
610 with self.assertWarns(FutureWarning):
611 self.assertLess(
612 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
613 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
614 )
615 # Use a pattern that can match either parent or components. Again,
616 # components are only returned if components=True.
617 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
618 self.assertEqual(
619 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
620 )
621 with self.assertWarns(FutureWarning):
622 self.assertLess(
623 {"bias", "bias.wcs"},
624 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
625 )
626 # This pattern matches only a component. In this case we also return
627 # that component dataset type if components=None.
628 with self.assertWarns(FutureWarning):
629 self.assertEqual(
630 {"bias.wcs"},
631 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=None)).names,
632 )
633 self.assertEqual(
634 set(),
635 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
636 )
637 with self.assertWarns(FutureWarning):
638 self.assertEqual(
639 {"bias.wcs"},
640 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
641 )
642 # Add a dataset type using a StorageClass that we'll then remove; check
643 # that this does not affect our ability to query for dataset types
644 # (though it will warn).
645 tempStorageClass = StorageClass(
646 name="TempStorageClass",
647 components={
648 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
649 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
650 },
651 )
652 registry.storageClasses.registerStorageClass(tempStorageClass)
653 datasetType = DatasetType(
654 "temporary",
655 dimensions=["instrument"],
656 storageClass=tempStorageClass,
657 universe=registry.dimensions,
658 )
659 registry.registerDatasetType(datasetType)
660 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
661 datasetType._storageClass = None
662 del tempStorageClass
663 # Querying for all dataset types, including components, should include
664 # at least all non-component dataset types (and I don't want to
665 # enumerate all of the Exposure components for bias and flat here).
666 with self.assertWarns(FutureWarning):
667 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
668 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
669 self.assertIn("TempStorageClass", cm.output[0])
670 self.assertLess({"bias", "flat", "temporary"}, everything.names)
671 # It should not include "temporary.columns", because we tried to remove
672 # the storage class that would tell it about that. So if the next line
673 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
674 # this part of the test isn't doing anything, because the _unregister
675 # call about isn't simulating the real-life case we want it to
676 # simulate, in which different versions of daf_butler in entirely
677 # different Python processes interact with the same repo.
678 self.assertNotIn("temporary.data", everything.names)
679 # Query for dataset types that start with "temp". This should again
680 # not include the component, and also not fail.
681 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
682 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
683 self.assertIn("TempStorageClass", cm.output[0])
684 self.assertEqual({"temporary"}, startsWithTemp.names)
685 # Querying with no components should not warn at all.
686 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
687 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
688 # Must issue a warning of our own to be captured.
689 logging.getLogger("lsst.daf.butler.registries").warning("test message")
690 self.assertEqual(len(cm.output), 1)
691 self.assertIn("test message", cm.output[0])
693 def testComponentLookups(self):
694 """Test searching for component datasets via their parents.
696 All of the behavior here is deprecated, so many of these tests are
697 currently wrapped in a context to check that we get a warning whenever
698 a component dataset is actually returned.
699 """
700 registry = self.makeRegistry()
701 self.loadData(registry, "base.yaml")
702 self.loadData(registry, "datasets.yaml")
703 # Test getting the child dataset type (which does still exist in the
704 # Registry), and check for consistency with
705 # DatasetRef.makeComponentRef.
706 collection = "imported_g"
707 parentType = registry.getDatasetType("bias")
708 childType = registry.getDatasetType("bias.wcs")
709 parentRefResolved = registry.findDataset(
710 parentType, collections=collection, instrument="Cam1", detector=1
711 )
712 self.assertIsInstance(parentRefResolved, DatasetRef)
713 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
714 # Search for a single dataset with findDataset.
715 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
716 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
717 # Search for detector data IDs constrained by component dataset
718 # existence with queryDataIds.
719 with self.assertWarns(FutureWarning):
720 dataIds = registry.queryDataIds(
721 ["detector"],
722 datasets=["bias.wcs"],
723 collections=collection,
724 ).toSet()
725 self.assertEqual(
726 dataIds,
727 DataCoordinateSet(
728 {
729 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
730 for d in (1, 2, 3)
731 },
732 parentType.dimensions,
733 ),
734 )
735 # Search for multiple datasets of a single type with queryDatasets.
736 with self.assertWarns(FutureWarning):
737 childRefs2 = set(
738 registry.queryDatasets(
739 "bias.wcs",
740 collections=collection,
741 )
742 )
743 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType})
744 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds))
746 def testCollections(self):
747 """Tests for registry methods that manage collections."""
748 registry = self.makeRegistry()
749 other_registry = self.makeRegistry(share_repo_with=registry)
750 self.loadData(registry, "base.yaml")
751 self.loadData(registry, "datasets.yaml")
752 run1 = "imported_g"
753 run2 = "imported_r"
754 # Test setting a collection docstring after it has been created.
755 registry.setCollectionDocumentation(run1, "doc for run1")
756 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
757 registry.setCollectionDocumentation(run1, None)
758 self.assertIsNone(registry.getCollectionDocumentation(run1))
759 datasetType = "bias"
760 # Find some datasets via their run's collection.
761 dataId1 = {"instrument": "Cam1", "detector": 1}
762 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
763 self.assertIsNotNone(ref1)
764 dataId2 = {"instrument": "Cam1", "detector": 2}
765 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
766 self.assertIsNotNone(ref2)
767 # Associate those into a new collection, then look for them there.
768 tag1 = "tag1"
769 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
770 # Check that we can query for old and new collections by type.
771 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
772 self.assertEqual(
773 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
774 {tag1, run1, run2},
775 )
776 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
777 registry.associate(tag1, [ref1, ref2])
778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
780 # Disassociate one and verify that we can't it there anymore...
781 registry.disassociate(tag1, [ref1])
782 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
783 # ...but we can still find ref2 in tag1, and ref1 in the run.
784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
786 collections = set(registry.queryCollections())
787 self.assertEqual(collections, {run1, run2, tag1})
788 # Associate both refs into tag1 again; ref2 is already there, but that
789 # should be a harmless no-op.
790 registry.associate(tag1, [ref1, ref2])
791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
792 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
793 # Get a different dataset (from a different run) that has the same
794 # dataset type and data ID as ref2.
795 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
796 self.assertNotEqual(ref2, ref2b)
797 # Attempting to associate that into tag1 should be an error.
798 with self.assertRaises(ConflictingDefinitionError):
799 registry.associate(tag1, [ref2b])
800 # That error shouldn't have messed up what we had before.
801 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
802 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
803 # Attempt to associate the conflicting dataset again, this time with
804 # a dataset that isn't in the collection and won't cause a conflict.
805 # Should also fail without modifying anything.
806 dataId3 = {"instrument": "Cam1", "detector": 3}
807 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
808 with self.assertRaises(ConflictingDefinitionError):
809 registry.associate(tag1, [ref3, ref2b])
810 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
811 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
812 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
813 # Register a chained collection that searches [tag1, run2]
814 chain1 = "chain1"
815 registry.registerCollection(chain1, type=CollectionType.CHAINED)
816 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
817 # Chained collection exists, but has no collections in it.
818 self.assertFalse(registry.getCollectionChain(chain1))
819 # If we query for all collections, we should get the chained collection
820 # only if we don't ask to flatten it (i.e. yield only its children).
821 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
822 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
823 # Attempt to set its child collections to something circular; that
824 # should fail.
825 with self.assertRaises(ValueError):
826 registry.setCollectionChain(chain1, [tag1, chain1])
827 # Add the child collections.
828 registry.setCollectionChain(chain1, [tag1, run2])
829 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
830 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
831 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
832 # Refresh the other registry that points to the same repo, and make
833 # sure it can see the things we've done (note that this does require
834 # an explicit refresh(); that's the documented behavior, because
835 # caching is ~impossible otherwise).
836 if other_registry is not None:
837 other_registry.refresh()
838 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
839 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
840 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
841 # Searching for dataId1 or dataId2 in the chain should return ref1 and
842 # ref2, because both are in tag1.
843 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
844 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
845 # Now disassociate ref2 from tag1. The search (for bias) with
846 # dataId2 in chain1 should then:
847 # 1. not find it in tag1
848 # 2. find a different dataset in run2
849 registry.disassociate(tag1, [ref2])
850 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
851 self.assertNotEqual(ref2b, ref2)
852 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
853 # Define a new chain so we can test recursive chains.
854 chain2 = "chain2"
855 registry.registerCollection(chain2, type=CollectionType.CHAINED)
856 registry.setCollectionChain(chain2, [run2, chain1])
857 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
858 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
859 # Query for collections matching a regex.
860 self.assertCountEqual(
861 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
862 ["imported_r", "imported_g"],
863 )
864 # Query for collections matching a regex or an explicit str.
865 self.assertCountEqual(
866 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
867 ["imported_r", "imported_g", "chain1"],
868 )
869 # Search for bias with dataId1 should find it via tag1 in chain2,
870 # recursing, because is not in run1.
871 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
872 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
873 # Search for bias with dataId2 should find it in run2 (ref2b).
874 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
875 # Search for a flat that is in run2. That should not be found
876 # at the front of chain2, because of the restriction to bias
877 # on run2 there, but it should be found in at the end of chain1.
878 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
879 ref4 = registry.findDataset("flat", dataId4, collections=run2)
880 self.assertIsNotNone(ref4)
881 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
882 # Deleting a collection that's part of a CHAINED collection is not
883 # allowed, and is exception-safe.
884 with self.assertRaises(sqlalchemy.exc.IntegrityError):
885 registry.removeCollection(run2)
886 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
887 with self.assertRaises(sqlalchemy.exc.IntegrityError):
888 registry.removeCollection(chain1)
889 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
890 # Actually remove chain2, test that it's gone by asking for its type.
891 registry.removeCollection(chain2)
892 with self.assertRaises(MissingCollectionError):
893 registry.getCollectionType(chain2)
894 # Actually remove run2 and chain1, which should work now.
895 registry.removeCollection(chain1)
896 registry.removeCollection(run2)
897 with self.assertRaises(MissingCollectionError):
898 registry.getCollectionType(run2)
899 with self.assertRaises(MissingCollectionError):
900 registry.getCollectionType(chain1)
901 # Remove tag1 as well, just to test that we can remove TAGGED
902 # collections.
903 registry.removeCollection(tag1)
904 with self.assertRaises(MissingCollectionError):
905 registry.getCollectionType(tag1)
907 def testCollectionChainFlatten(self):
908 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
909 registry = self.makeRegistry()
910 registry.registerCollection("inner", CollectionType.CHAINED)
911 registry.registerCollection("innermost", CollectionType.RUN)
912 registry.setCollectionChain("inner", ["innermost"])
913 registry.registerCollection("outer", CollectionType.CHAINED)
914 registry.setCollectionChain("outer", ["inner"], flatten=False)
915 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
916 registry.setCollectionChain("outer", ["inner"], flatten=True)
917 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
919 def testBasicTransaction(self):
920 """Test that all operations within a single transaction block are
921 rolled back if an exception propagates out of the block.
922 """
923 registry = self.makeRegistry()
924 storageClass = StorageClass("testDatasetType")
925 registry.storageClasses.registerStorageClass(storageClass)
926 with registry.transaction():
927 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
928 with self.assertRaises(ValueError):
929 with registry.transaction():
930 registry.insertDimensionData("instrument", {"name": "Cam2"})
931 raise ValueError("Oops, something went wrong")
932 # Cam1 should exist
933 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
934 # But Cam2 and Cam3 should both not exist
935 with self.assertRaises(DataIdValueError):
936 registry.expandDataId(instrument="Cam2")
937 with self.assertRaises(DataIdValueError):
938 registry.expandDataId(instrument="Cam3")
940 def testNestedTransaction(self):
941 """Test that operations within a transaction block are not rolled back
942 if an exception propagates out of an inner transaction block and is
943 then caught.
944 """
945 registry = self.makeRegistry()
946 dimension = registry.dimensions["instrument"]
947 dataId1 = {"instrument": "DummyCam"}
948 dataId2 = {"instrument": "DummyCam2"}
949 checkpointReached = False
950 with registry.transaction():
951 # This should be added and (ultimately) committed.
952 registry.insertDimensionData(dimension, dataId1)
953 with self.assertRaises(sqlalchemy.exc.IntegrityError):
954 with registry.transaction(savepoint=True):
955 # This does not conflict, and should succeed (but not
956 # be committed).
957 registry.insertDimensionData(dimension, dataId2)
958 checkpointReached = True
959 # This should conflict and raise, triggerring a rollback
960 # of the previous insertion within the same transaction
961 # context, but not the original insertion in the outer
962 # block.
963 registry.insertDimensionData(dimension, dataId1)
964 self.assertTrue(checkpointReached)
965 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
966 with self.assertRaises(DataIdValueError):
967 registry.expandDataId(dataId2, graph=dimension.graph)
969 def testInstrumentDimensions(self):
970 """Test queries involving only instrument dimensions, with no joins to
971 skymap.
972 """
973 registry = self.makeRegistry()
975 # need a bunch of dimensions and datasets for test
976 registry.insertDimensionData(
977 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
978 )
979 registry.insertDimensionData(
980 "physical_filter",
981 dict(instrument="DummyCam", name="dummy_r", band="r"),
982 dict(instrument="DummyCam", name="dummy_i", band="i"),
983 )
984 registry.insertDimensionData(
985 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
986 )
987 registry.insertDimensionData(
988 "visit_system",
989 dict(instrument="DummyCam", id=1, name="default"),
990 )
991 registry.insertDimensionData(
992 "visit",
993 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
994 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
995 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
996 )
997 for i in range(1, 6):
998 registry.insertDimensionData(
999 "visit_detector_region",
1000 dict(instrument="DummyCam", visit=10, detector=i),
1001 dict(instrument="DummyCam", visit=11, detector=i),
1002 dict(instrument="DummyCam", visit=20, detector=i),
1003 )
1004 registry.insertDimensionData(
1005 "exposure",
1006 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
1007 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
1008 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
1009 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
1010 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
1011 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
1012 )
1013 registry.insertDimensionData(
1014 "visit_definition",
1015 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1016 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1017 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1018 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1019 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1020 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1021 )
1022 # dataset types
1023 run1 = "test1_r"
1024 run2 = "test2_r"
1025 tagged2 = "test2_t"
1026 registry.registerRun(run1)
1027 registry.registerRun(run2)
1028 registry.registerCollection(tagged2)
1029 storageClass = StorageClass("testDataset")
1030 registry.storageClasses.registerStorageClass(storageClass)
1031 rawType = DatasetType(
1032 name="RAW",
1033 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1034 storageClass=storageClass,
1035 )
1036 registry.registerDatasetType(rawType)
1037 calexpType = DatasetType(
1038 name="CALEXP",
1039 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1040 storageClass=storageClass,
1041 )
1042 registry.registerDatasetType(calexpType)
1044 # add pre-existing datasets
1045 for exposure in (100, 101, 110, 111):
1046 for detector in (1, 2, 3):
1047 # note that only 3 of 5 detectors have datasets
1048 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1049 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1050 # exposures 100 and 101 appear in both run1 and tagged2.
1051 # 100 has different datasets in the different collections
1052 # 101 has the same dataset in both collections.
1053 if exposure == 100:
1054 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1055 if exposure in (100, 101):
1056 registry.associate(tagged2, [ref])
1057 # Add pre-existing datasets to tagged2.
1058 for exposure in (200, 201):
1059 for detector in (3, 4, 5):
1060 # note that only 3 of 5 detectors have datasets
1061 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1062 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1063 registry.associate(tagged2, [ref])
1065 dimensions = DimensionGraph(
1066 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1067 )
1068 # Test that single dim string works as well as list of str
1069 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1070 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1071 self.assertEqual(rows, rowsI)
1072 # with empty expression
1073 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1074 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1075 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1076 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1077 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1079 # second collection
1080 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1081 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1082 for dataId in rows:
1083 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1084 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1085 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1086 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1088 # with two input datasets
1089 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1090 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1091 for dataId in rows:
1092 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1093 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1094 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1095 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1097 # limit to single visit
1098 rows = registry.queryDataIds(
1099 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1100 ).toSet()
1101 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1102 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1103 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1104 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1106 # more limiting expression, using link names instead of Table.column
1107 rows = registry.queryDataIds(
1108 dimensions,
1109 datasets=rawType,
1110 collections=run1,
1111 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1112 ).toSet()
1113 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1114 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1115 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1116 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1118 # queryDataIds with only one of `datasets` and `collections` is an
1119 # error.
1120 with self.assertRaises(CollectionError):
1121 registry.queryDataIds(dimensions, datasets=rawType)
1122 with self.assertRaises(ArgumentError):
1123 registry.queryDataIds(dimensions, collections=run1)
1125 # expression excludes everything
1126 rows = registry.queryDataIds(
1127 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1128 ).toSet()
1129 self.assertEqual(len(rows), 0)
1131 # Selecting by physical_filter, this is not in the dimensions, but it
1132 # is a part of the full expression so it should work too.
1133 rows = registry.queryDataIds(
1134 dimensions,
1135 datasets=rawType,
1136 collections=run1,
1137 where="physical_filter = 'dummy_r'",
1138 instrument="DummyCam",
1139 ).toSet()
1140 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1141 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1142 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1143 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1145 def testSkyMapDimensions(self):
1146 """Tests involving only skymap dimensions, no joins to instrument."""
1147 registry = self.makeRegistry()
1149 # need a bunch of dimensions and datasets for test, we want
1150 # "band" in the test so also have to add physical_filter
1151 # dimensions
1152 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1153 registry.insertDimensionData(
1154 "physical_filter",
1155 dict(instrument="DummyCam", name="dummy_r", band="r"),
1156 dict(instrument="DummyCam", name="dummy_i", band="i"),
1157 )
1158 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1159 for tract in range(10):
1160 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1161 registry.insertDimensionData(
1162 "patch",
1163 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1164 )
1166 # dataset types
1167 run = "tésτ"
1168 registry.registerRun(run)
1169 storageClass = StorageClass("testDataset")
1170 registry.storageClasses.registerStorageClass(storageClass)
1171 calexpType = DatasetType(
1172 name="deepCoadd_calexp",
1173 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1174 storageClass=storageClass,
1175 )
1176 registry.registerDatasetType(calexpType)
1177 mergeType = DatasetType(
1178 name="deepCoadd_mergeDet",
1179 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1180 storageClass=storageClass,
1181 )
1182 registry.registerDatasetType(mergeType)
1183 measType = DatasetType(
1184 name="deepCoadd_meas",
1185 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1186 storageClass=storageClass,
1187 )
1188 registry.registerDatasetType(measType)
1190 dimensions = DimensionGraph(
1191 registry.dimensions,
1192 dimensions=(
1193 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1194 ),
1195 )
1197 # add pre-existing datasets
1198 for tract in (1, 3, 5):
1199 for patch in (2, 4, 6, 7):
1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1201 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1202 for aFilter in ("i", "r"):
1203 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1204 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1206 # with empty expression
1207 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1208 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1209 for dataId in rows:
1210 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1211 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1212 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1213 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1215 # limit to 2 tracts and 2 patches
1216 rows = registry.queryDataIds(
1217 dimensions,
1218 datasets=[calexpType, mergeType],
1219 collections=run,
1220 where="tract IN (1, 5) AND patch IN (2, 7)",
1221 skymap="DummyMap",
1222 ).toSet()
1223 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1224 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1225 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1226 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1228 # limit to single filter
1229 rows = registry.queryDataIds(
1230 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1231 ).toSet()
1232 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1233 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1234 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1235 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1237 # Specifying non-existing skymap is an exception
1238 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1239 rows = registry.queryDataIds(
1240 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1241 ).toSet()
1243 def testSpatialJoin(self):
1244 """Test queries that involve spatial overlap joins."""
1245 registry = self.makeRegistry()
1246 self.loadData(registry, "hsc-rc2-subset.yaml")
1248 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1249 # the TopologicalFamily they belong to. We'll relate all elements in
1250 # each family to all of the elements in each other family.
1251 families = defaultdict(set)
1252 # Dictionary of {element.name: {dataId: region}}.
1253 regions = {}
1254 for element in registry.dimensions.getDatabaseElements():
1255 if element.spatial is not None:
1256 families[element.spatial.name].add(element)
1257 regions[element.name] = {
1258 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1259 }
1261 # If this check fails, it's not necessarily a problem - it may just be
1262 # a reasonable change to the default dimension definitions - but the
1263 # test below depends on there being more than one family to do anything
1264 # useful.
1265 self.assertEqual(len(families), 2)
1267 # Overlap DatabaseDimensionElements with each other.
1268 for family1, family2 in itertools.combinations(families, 2):
1269 for element1, element2 in itertools.product(families[family1], families[family2]):
1270 graph = DimensionGraph.union(element1.graph, element2.graph)
1271 # Construct expected set of overlapping data IDs via a
1272 # brute-force comparison of the regions we've already fetched.
1273 expected = {
1274 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1275 for (dataId1, region1), (dataId2, region2) in itertools.product(
1276 regions[element1.name].items(), regions[element2.name].items()
1277 )
1278 if not region1.isDisjointFrom(region2)
1279 }
1280 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1281 queried = set(registry.queryDataIds(graph))
1282 self.assertEqual(expected, queried)
1284 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1285 commonSkyPix = registry.dimensions.commonSkyPix
1286 for elementName, these_regions in regions.items():
1287 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1288 expected = set()
1289 for dataId, region in these_regions.items():
1290 for begin, end in commonSkyPix.pixelization.envelope(region):
1291 expected.update(
1292 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1293 for index in range(begin, end)
1294 )
1295 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1296 queried = set(registry.queryDataIds(graph))
1297 self.assertEqual(expected, queried)
1299 def testAbstractQuery(self):
1300 """Test that we can run a query that just lists the known
1301 bands. This is tricky because band is
1302 backed by a query against physical_filter.
1303 """
1304 registry = self.makeRegistry()
1305 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1306 registry.insertDimensionData(
1307 "physical_filter",
1308 dict(instrument="DummyCam", name="dummy_i", band="i"),
1309 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1310 dict(instrument="DummyCam", name="dummy_r", band="r"),
1311 )
1312 rows = registry.queryDataIds(["band"]).toSet()
1313 self.assertCountEqual(
1314 rows,
1315 [
1316 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1317 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1318 ],
1319 )
1321 def testAttributeManager(self):
1322 """Test basic functionality of attribute manager."""
1323 # number of attributes with schema versions in a fresh database,
1324 # 6 managers with 2 records per manager, plus config for dimensions
1325 VERSION_COUNT = 6 * 2 + 1
1327 registry = self.makeRegistry()
1328 attributes = registry._managers.attributes
1330 # check what get() returns for non-existing key
1331 self.assertIsNone(attributes.get("attr"))
1332 self.assertEqual(attributes.get("attr", ""), "")
1333 self.assertEqual(attributes.get("attr", "Value"), "Value")
1334 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1336 # cannot store empty key or value
1337 with self.assertRaises(ValueError):
1338 attributes.set("", "value")
1339 with self.assertRaises(ValueError):
1340 attributes.set("attr", "")
1342 # set value of non-existing key
1343 attributes.set("attr", "value")
1344 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1345 self.assertEqual(attributes.get("attr"), "value")
1347 # update value of existing key
1348 with self.assertRaises(ButlerAttributeExistsError):
1349 attributes.set("attr", "value2")
1351 attributes.set("attr", "value2", force=True)
1352 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1353 self.assertEqual(attributes.get("attr"), "value2")
1355 # delete existing key
1356 self.assertTrue(attributes.delete("attr"))
1357 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1359 # delete non-existing key
1360 self.assertFalse(attributes.delete("non-attr"))
1362 # store bunch of keys and get the list back
1363 data = [
1364 ("version.core", "1.2.3"),
1365 ("version.dimensions", "3.2.1"),
1366 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1367 ]
1368 for key, value in data:
1369 attributes.set(key, value)
1370 items = dict(attributes.items())
1371 for key, value in data:
1372 self.assertEqual(items[key], value)
1374 def testQueryDatasetsDeduplication(self):
1375 """Test that the findFirst option to queryDatasets selects datasets
1376 from collections in the order given".
1377 """
1378 registry = self.makeRegistry()
1379 self.loadData(registry, "base.yaml")
1380 self.loadData(registry, "datasets.yaml")
1381 self.assertCountEqual(
1382 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1383 [
1384 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1385 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1386 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1387 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1388 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1389 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1390 ],
1391 )
1392 self.assertCountEqual(
1393 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1394 [
1395 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1396 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1397 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1398 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1399 ],
1400 )
1401 self.assertCountEqual(
1402 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1403 [
1404 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1405 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1406 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1407 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1408 ],
1409 )
1411 def testQueryResults(self):
1412 """Test querying for data IDs and then manipulating the QueryResults
1413 object returned to perform other queries.
1414 """
1415 registry = self.makeRegistry()
1416 self.loadData(registry, "base.yaml")
1417 self.loadData(registry, "datasets.yaml")
1418 bias = registry.getDatasetType("bias")
1419 flat = registry.getDatasetType("flat")
1420 # Obtain expected results from methods other than those we're testing
1421 # here. That includes:
1422 # - the dimensions of the data IDs we want to query:
1423 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1424 # - the dimensions of some other data IDs we'll extract from that:
1425 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1426 # - the data IDs we expect to obtain from the first queries:
1427 expectedDataIds = DataCoordinateSet(
1428 {
1429 DataCoordinate.standardize(
1430 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1431 )
1432 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1433 },
1434 graph=expectedGraph,
1435 hasFull=False,
1436 hasRecords=False,
1437 )
1438 # - the flat datasets we expect to find from those data IDs, in just
1439 # one collection (so deduplication is irrelevant):
1440 expectedFlats = [
1441 registry.findDataset(
1442 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1443 ),
1444 registry.findDataset(
1445 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1446 ),
1447 registry.findDataset(
1448 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1449 ),
1450 ]
1451 # - the data IDs we expect to extract from that:
1452 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1453 # - the bias datasets we expect to find from those data IDs, after we
1454 # subset-out the physical_filter dimension, both with duplicates:
1455 expectedAllBiases = [
1456 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1457 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1458 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1459 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1460 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1461 ]
1462 # - ...and without duplicates:
1463 expectedDeduplicatedBiases = [
1464 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1465 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1466 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1467 ]
1468 # Test against those expected results, using a "lazy" query for the
1469 # data IDs (which re-executes that query each time we use it to do
1470 # something new).
1471 dataIds = registry.queryDataIds(
1472 ["detector", "physical_filter"],
1473 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1474 instrument="Cam1",
1475 )
1476 self.assertEqual(dataIds.graph, expectedGraph)
1477 self.assertEqual(dataIds.toSet(), expectedDataIds)
1478 self.assertCountEqual(
1479 list(
1480 dataIds.findDatasets(
1481 flat,
1482 collections=["imported_r"],
1483 )
1484 ),
1485 expectedFlats,
1486 )
1487 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1488 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1489 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1490 self.assertCountEqual(
1491 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1492 expectedAllBiases,
1493 )
1494 self.assertCountEqual(
1495 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1496 expectedDeduplicatedBiases,
1497 )
1499 # Searching for a dataset with dimensions we had projected away
1500 # restores those dimensions.
1501 self.assertCountEqual(
1502 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1503 expectedFlats,
1504 )
1506 # Use a component dataset type.
1507 self.assertCountEqual(
1508 [
1509 ref.makeComponentRef("image")
1510 for ref in subsetDataIds.findDatasets(
1511 bias,
1512 collections=["imported_r", "imported_g"],
1513 findFirst=False,
1514 )
1515 ],
1516 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1517 )
1519 # Use a named dataset type that does not exist and a dataset type
1520 # object that does not exist.
1521 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1523 # Test both string name and dataset type object.
1524 test_type: str | DatasetType
1525 for test_type, test_type_name in (
1526 (unknown_type, unknown_type.name),
1527 (unknown_type.name, unknown_type.name),
1528 ):
1529 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1530 list(
1531 subsetDataIds.findDatasets(
1532 test_type, collections=["imported_r", "imported_g"], findFirst=True
1533 )
1534 )
1536 # Materialize the bias dataset queries (only) by putting the results
1537 # into temporary tables, then repeat those tests.
1538 with subsetDataIds.findDatasets(
1539 bias, collections=["imported_r", "imported_g"], findFirst=False
1540 ).materialize() as biases:
1541 self.assertCountEqual(list(biases), expectedAllBiases)
1542 with subsetDataIds.findDatasets(
1543 bias, collections=["imported_r", "imported_g"], findFirst=True
1544 ).materialize() as biases:
1545 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1546 # Materialize the data ID subset query, but not the dataset queries.
1547 with subsetDataIds.materialize() as subsetDataIds:
1548 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1549 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1550 self.assertCountEqual(
1551 list(
1552 subsetDataIds.findDatasets(
1553 bias, collections=["imported_r", "imported_g"], findFirst=False
1554 )
1555 ),
1556 expectedAllBiases,
1557 )
1558 self.assertCountEqual(
1559 list(
1560 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1561 ),
1562 expectedDeduplicatedBiases,
1563 )
1564 # Materialize the dataset queries, too.
1565 with subsetDataIds.findDatasets(
1566 bias, collections=["imported_r", "imported_g"], findFirst=False
1567 ).materialize() as biases:
1568 self.assertCountEqual(list(biases), expectedAllBiases)
1569 with subsetDataIds.findDatasets(
1570 bias, collections=["imported_r", "imported_g"], findFirst=True
1571 ).materialize() as biases:
1572 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1573 # Materialize the original query, but none of the follow-up queries.
1574 with dataIds.materialize() as dataIds:
1575 self.assertEqual(dataIds.graph, expectedGraph)
1576 self.assertEqual(dataIds.toSet(), expectedDataIds)
1577 self.assertCountEqual(
1578 list(
1579 dataIds.findDatasets(
1580 flat,
1581 collections=["imported_r"],
1582 )
1583 ),
1584 expectedFlats,
1585 )
1586 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1587 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1588 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1589 self.assertCountEqual(
1590 list(
1591 subsetDataIds.findDatasets(
1592 bias, collections=["imported_r", "imported_g"], findFirst=False
1593 )
1594 ),
1595 expectedAllBiases,
1596 )
1597 self.assertCountEqual(
1598 list(
1599 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1600 ),
1601 expectedDeduplicatedBiases,
1602 )
1603 # Materialize just the bias dataset queries.
1604 with subsetDataIds.findDatasets(
1605 bias, collections=["imported_r", "imported_g"], findFirst=False
1606 ).materialize() as biases:
1607 self.assertCountEqual(list(biases), expectedAllBiases)
1608 with subsetDataIds.findDatasets(
1609 bias, collections=["imported_r", "imported_g"], findFirst=True
1610 ).materialize() as biases:
1611 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1612 # Materialize the subset data ID query, but not the dataset
1613 # queries.
1614 with subsetDataIds.materialize() as subsetDataIds:
1615 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1616 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1617 self.assertCountEqual(
1618 list(
1619 subsetDataIds.findDatasets(
1620 bias, collections=["imported_r", "imported_g"], findFirst=False
1621 )
1622 ),
1623 expectedAllBiases,
1624 )
1625 self.assertCountEqual(
1626 list(
1627 subsetDataIds.findDatasets(
1628 bias, collections=["imported_r", "imported_g"], findFirst=True
1629 )
1630 ),
1631 expectedDeduplicatedBiases,
1632 )
1633 # Materialize the bias dataset queries, too, so now we're
1634 # materializing every single step.
1635 with subsetDataIds.findDatasets(
1636 bias, collections=["imported_r", "imported_g"], findFirst=False
1637 ).materialize() as biases:
1638 self.assertCountEqual(list(biases), expectedAllBiases)
1639 with subsetDataIds.findDatasets(
1640 bias, collections=["imported_r", "imported_g"], findFirst=True
1641 ).materialize() as biases:
1642 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1644 def testStorageClassPropagation(self):
1645 """Test that queries for datasets respect the storage class passed in
1646 as part of a full dataset type.
1647 """
1648 registry = self.makeRegistry()
1649 self.loadData(registry, "base.yaml")
1650 dataset_type_in_registry = DatasetType(
1651 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1652 )
1653 registry.registerDatasetType(dataset_type_in_registry)
1654 run = "run1"
1655 registry.registerRun(run)
1656 (inserted_ref,) = registry.insertDatasets(
1657 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1658 )
1659 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1660 query_dataset_type = DatasetType(
1661 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1662 )
1663 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1664 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1665 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1666 (query_datasets_ref,) = query_datasets_result
1667 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1668 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1669 query_dataset_type, collections=[run]
1670 )
1671 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1672 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1673 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1674 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1675 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1676 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1677 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1679 def testEmptyDimensionsQueries(self):
1680 """Test Query and QueryResults objects in the case where there are no
1681 dimensions.
1682 """
1683 # Set up test data: one dataset type, two runs, one dataset in each.
1684 registry = self.makeRegistry()
1685 self.loadData(registry, "base.yaml")
1686 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1687 registry.registerDatasetType(schema)
1688 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1689 run1 = "run1"
1690 run2 = "run2"
1691 registry.registerRun(run1)
1692 registry.registerRun(run2)
1693 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1694 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1695 # Query directly for both of the datasets, and each one, one at a time.
1696 self.checkQueryResults(
1697 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1698 )
1699 self.checkQueryResults(
1700 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1701 [dataset1],
1702 )
1703 self.checkQueryResults(
1704 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1705 [dataset2],
1706 )
1707 # Query for data IDs with no dimensions.
1708 dataIds = registry.queryDataIds([])
1709 self.checkQueryResults(dataIds, [dataId])
1710 # Use queried data IDs to find the datasets.
1711 self.checkQueryResults(
1712 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1713 [dataset1, dataset2],
1714 )
1715 self.checkQueryResults(
1716 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1717 [dataset1],
1718 )
1719 self.checkQueryResults(
1720 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1721 [dataset2],
1722 )
1723 # Now materialize the data ID query results and repeat those tests.
1724 with dataIds.materialize() as dataIds:
1725 self.checkQueryResults(dataIds, [dataId])
1726 self.checkQueryResults(
1727 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1728 [dataset1],
1729 )
1730 self.checkQueryResults(
1731 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1732 [dataset2],
1733 )
1734 # Query for non-empty data IDs, then subset that to get the empty one.
1735 # Repeat the above tests starting from that.
1736 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1737 self.checkQueryResults(dataIds, [dataId])
1738 self.checkQueryResults(
1739 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1740 [dataset1, dataset2],
1741 )
1742 self.checkQueryResults(
1743 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1744 [dataset1],
1745 )
1746 self.checkQueryResults(
1747 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1748 [dataset2],
1749 )
1750 with dataIds.materialize() as dataIds:
1751 self.checkQueryResults(dataIds, [dataId])
1752 self.checkQueryResults(
1753 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1754 [dataset1, dataset2],
1755 )
1756 self.checkQueryResults(
1757 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1758 [dataset1],
1759 )
1760 self.checkQueryResults(
1761 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1762 [dataset2],
1763 )
1764 # Query for non-empty data IDs, then materialize, then subset to get
1765 # the empty one. Repeat again.
1766 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1767 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1768 self.checkQueryResults(dataIds, [dataId])
1769 self.checkQueryResults(
1770 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1771 [dataset1, dataset2],
1772 )
1773 self.checkQueryResults(
1774 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1775 [dataset1],
1776 )
1777 self.checkQueryResults(
1778 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1779 [dataset2],
1780 )
1781 with dataIds.materialize() as dataIds:
1782 self.checkQueryResults(dataIds, [dataId])
1783 self.checkQueryResults(
1784 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1785 [dataset1, dataset2],
1786 )
1787 self.checkQueryResults(
1788 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1789 [dataset1],
1790 )
1791 self.checkQueryResults(
1792 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1793 [dataset2],
1794 )
1795 # Query for non-empty data IDs with a constraint on an empty-data-ID
1796 # dataset that exists.
1797 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1798 self.checkQueryResults(
1799 dataIds.subset(unique=True),
1800 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1801 )
1802 # Again query for non-empty data IDs with a constraint on empty-data-ID
1803 # datasets, but when the datasets don't exist. We delete the existing
1804 # dataset and query just that collection rather than creating a new
1805 # empty collection because this is a bit less likely for our build-time
1806 # logic to shortcut-out (via the collection summaries), and such a
1807 # shortcut would make this test a bit more trivial than we'd like.
1808 registry.removeDatasets([dataset2])
1809 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1810 self.checkQueryResults(dataIds, [])
1812 def testDimensionDataModifications(self):
1813 """Test that modifying dimension records via:
1814 syncDimensionData(..., update=True) and
1815 insertDimensionData(..., replace=True) works as expected, even in the
1816 presence of datasets using those dimensions and spatial overlap
1817 relationships.
1818 """
1820 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1821 """Unpack a sphgeom.RangeSet into the integers it contains."""
1822 for begin, end in ranges:
1823 yield from range(begin, end)
1825 def range_set_hull(
1826 ranges: lsst.sphgeom.RangeSet,
1827 pixelization: lsst.sphgeom.HtmPixelization,
1828 ) -> lsst.sphgeom.ConvexPolygon:
1829 """Create a ConvexPolygon hull of the region defined by a set of
1830 HTM pixelization index ranges.
1831 """
1832 points = []
1833 for index in unpack_range_set(ranges):
1834 points.extend(pixelization.triangle(index).getVertices())
1835 return lsst.sphgeom.ConvexPolygon(points)
1837 # Use HTM to set up an initial parent region (one arbitrary trixel)
1838 # and four child regions (the trixels within the parent at the next
1839 # level. We'll use the parent as a tract/visit region and the children
1840 # as its patch/visit_detector regions.
1841 registry = self.makeRegistry()
1842 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1843 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1844 index = 12288
1845 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1846 assert htm6.universe().contains(child_ranges_small)
1847 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1848 parent_region_small = lsst.sphgeom.ConvexPolygon(
1849 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1850 )
1851 assert all(parent_region_small.contains(c) for c in child_regions_small)
1852 # Make a larger version of each child region, defined to be the set of
1853 # htm6 trixels that overlap the original's bounding circle. Make a new
1854 # parent that's the convex hull of the new children.
1855 child_regions_large = [
1856 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1857 ]
1858 assert all(
1859 large.contains(small)
1860 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1861 )
1862 parent_region_large = lsst.sphgeom.ConvexPolygon(
1863 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1864 )
1865 assert all(parent_region_large.contains(c) for c in child_regions_large)
1866 assert parent_region_large.contains(parent_region_small)
1867 assert not parent_region_small.contains(parent_region_large)
1868 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1869 # Find some commonSkyPix indices that overlap the large regions but not
1870 # overlap the small regions. We use commonSkyPix here to make sure the
1871 # real tests later involve what's in the database, not just post-query
1872 # filtering of regions.
1873 child_difference_indices = []
1874 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1875 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1876 assert difference, "if this is empty, we can't test anything useful with these regions"
1877 assert all(
1878 not commonSkyPix.triangle(d).isDisjointFrom(large)
1879 and commonSkyPix.triangle(d).isDisjointFrom(small)
1880 for d in difference
1881 )
1882 child_difference_indices.append(difference)
1883 parent_difference_indices = list(
1884 unpack_range_set(
1885 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1886 )
1887 )
1888 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1889 assert all(
1890 (
1891 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1892 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1893 )
1894 for d in parent_difference_indices
1895 )
1896 # Now that we've finally got those regions, we'll insert the large ones
1897 # as tract/patch dimension records.
1898 skymap_name = "testing_v1"
1899 registry.insertDimensionData(
1900 "skymap",
1901 {
1902 "name": skymap_name,
1903 "hash": bytes([42]),
1904 "tract_max": 1,
1905 "patch_nx_max": 2,
1906 "patch_ny_max": 2,
1907 },
1908 )
1909 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1910 registry.insertDimensionData(
1911 "patch",
1912 *[
1913 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1914 for n, c in enumerate(child_regions_large)
1915 ],
1916 )
1917 # Add at dataset that uses these dimensions to make sure that modifying
1918 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1919 # implement insert with replace=True as delete-then-insert).
1920 dataset_type = DatasetType(
1921 "coadd",
1922 dimensions=["tract", "patch"],
1923 universe=registry.dimensions,
1924 storageClass="Exposure",
1925 )
1926 registry.registerDatasetType(dataset_type)
1927 registry.registerCollection("the_run", CollectionType.RUN)
1928 registry.insertDatasets(
1929 dataset_type,
1930 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1931 run="the_run",
1932 )
1933 # Query for tracts and patches that overlap some "difference" htm9
1934 # pixels; there should be overlaps, because the database has
1935 # the "large" suite of regions.
1936 self.assertEqual(
1937 {0},
1938 {
1939 data_id["tract"]
1940 for data_id in registry.queryDataIds(
1941 ["tract"],
1942 skymap=skymap_name,
1943 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1944 )
1945 },
1946 )
1947 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1948 self.assertIn(
1949 patch_id,
1950 {
1951 data_id["patch"]
1952 for data_id in registry.queryDataIds(
1953 ["patch"],
1954 skymap=skymap_name,
1955 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1956 )
1957 },
1958 )
1959 # Use sync to update the tract region and insert to update the regions
1960 # of the patches, to the "small" suite.
1961 updated = registry.syncDimensionData(
1962 "tract",
1963 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1964 update=True,
1965 )
1966 self.assertEqual(updated, {"region": parent_region_large})
1967 registry.insertDimensionData(
1968 "patch",
1969 *[
1970 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1971 for n, c in enumerate(child_regions_small)
1972 ],
1973 replace=True,
1974 )
1975 # Query again; there now should be no such overlaps, because the
1976 # database has the "small" suite of regions.
1977 self.assertFalse(
1978 set(
1979 registry.queryDataIds(
1980 ["tract"],
1981 skymap=skymap_name,
1982 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1983 )
1984 )
1985 )
1986 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1987 self.assertNotIn(
1988 patch_id,
1989 {
1990 data_id["patch"]
1991 for data_id in registry.queryDataIds(
1992 ["patch"],
1993 skymap=skymap_name,
1994 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1995 )
1996 },
1997 )
1998 # Update back to the large regions and query one more time.
1999 updated = registry.syncDimensionData(
2000 "tract",
2001 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2002 update=True,
2003 )
2004 self.assertEqual(updated, {"region": parent_region_small})
2005 registry.insertDimensionData(
2006 "patch",
2007 *[
2008 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2009 for n, c in enumerate(child_regions_large)
2010 ],
2011 replace=True,
2012 )
2013 self.assertEqual(
2014 {0},
2015 {
2016 data_id["tract"]
2017 for data_id in registry.queryDataIds(
2018 ["tract"],
2019 skymap=skymap_name,
2020 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2021 )
2022 },
2023 )
2024 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2025 self.assertIn(
2026 patch_id,
2027 {
2028 data_id["patch"]
2029 for data_id in registry.queryDataIds(
2030 ["patch"],
2031 skymap=skymap_name,
2032 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2033 )
2034 },
2035 )
2037 def testCalibrationCollections(self):
2038 """Test operations on `~CollectionType.CALIBRATION` collections,
2039 including `Registry.certify`, `Registry.decertify`,
2040 `Registry.findDataset`, and
2041 `DataCoordinateQueryResults.findRelatedDatasets`.
2042 """
2043 # Setup - make a Registry, fill it with some datasets in
2044 # non-calibration collections.
2045 registry = self.makeRegistry()
2046 self.loadData(registry, "base.yaml")
2047 self.loadData(registry, "datasets.yaml")
2048 # Set up some timestamps.
2049 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2050 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2051 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2052 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2053 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2054 allTimespans = [
2055 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2056 ]
2057 # Insert some exposure records with timespans between each sequential
2058 # pair of those.
2059 registry.insertDimensionData(
2060 "exposure",
2061 {
2062 "instrument": "Cam1",
2063 "id": 0,
2064 "obs_id": "zero",
2065 "physical_filter": "Cam1-G",
2066 "timespan": Timespan(t1, t2),
2067 },
2068 {
2069 "instrument": "Cam1",
2070 "id": 1,
2071 "obs_id": "one",
2072 "physical_filter": "Cam1-G",
2073 "timespan": Timespan(t2, t3),
2074 },
2075 {
2076 "instrument": "Cam1",
2077 "id": 2,
2078 "obs_id": "two",
2079 "physical_filter": "Cam1-G",
2080 "timespan": Timespan(t3, t4),
2081 },
2082 {
2083 "instrument": "Cam1",
2084 "id": 3,
2085 "obs_id": "three",
2086 "physical_filter": "Cam1-G",
2087 "timespan": Timespan(t4, t5),
2088 },
2089 )
2090 # Get references to some datasets.
2091 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2092 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2093 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2094 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2095 # Register the main calibration collection we'll be working with.
2096 collection = "Cam1/calibs/default"
2097 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2098 # Cannot associate into a calibration collection (no timespan).
2099 with self.assertRaises(CollectionTypeError):
2100 registry.associate(collection, [bias2a])
2101 # Certify 2a dataset with [t2, t4) validity.
2102 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2103 # Test that we can query for this dataset via the new collection, both
2104 # on its own and with a RUN collection.
2105 self.assertEqual(
2106 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2107 {bias2a},
2108 )
2109 self.assertEqual(
2110 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2111 {
2112 bias2a,
2113 bias2b,
2114 bias3b,
2115 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2116 },
2117 )
2118 self.assertEqual(
2119 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2120 {registry.expandDataId(instrument="Cam1", detector=2)},
2121 )
2122 self.assertEqual(
2123 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2124 {
2125 registry.expandDataId(instrument="Cam1", detector=2),
2126 registry.expandDataId(instrument="Cam1", detector=3),
2127 registry.expandDataId(instrument="Cam1", detector=4),
2128 },
2129 )
2130 self.assertEqual(
2131 set(
2132 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2133 "bias", findFirst=True, collections=[collection]
2134 )
2135 ),
2136 {
2137 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2138 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2139 },
2140 )
2141 self.assertEqual(
2142 set(
2143 registry.queryDataIds(
2144 ["exposure", "detector"], instrument="Cam1", detector=2
2145 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2146 ),
2147 {
2148 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2149 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2150 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2151 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2152 },
2153 )
2155 # We should not be able to certify 2b with anything overlapping that
2156 # window.
2157 with self.assertRaises(ConflictingDefinitionError):
2158 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2159 with self.assertRaises(ConflictingDefinitionError):
2160 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2161 with self.assertRaises(ConflictingDefinitionError):
2162 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2163 with self.assertRaises(ConflictingDefinitionError):
2164 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2165 with self.assertRaises(ConflictingDefinitionError):
2166 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2167 with self.assertRaises(ConflictingDefinitionError):
2168 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2169 with self.assertRaises(ConflictingDefinitionError):
2170 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2171 with self.assertRaises(ConflictingDefinitionError):
2172 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2173 # We should be able to certify 3a with a range overlapping that window,
2174 # because it's for a different detector.
2175 # We'll certify 3a over [t1, t3).
2176 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2177 # Now we'll certify 2b and 3b together over [t4, ∞).
2178 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2180 # Fetch all associations and check that they are what we expect.
2181 self.assertCountEqual(
2182 list(
2183 registry.queryDatasetAssociations(
2184 "bias",
2185 collections=[collection, "imported_g", "imported_r"],
2186 )
2187 ),
2188 [
2189 DatasetAssociation(
2190 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2191 collection="imported_g",
2192 timespan=None,
2193 ),
2194 DatasetAssociation(
2195 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2196 collection="imported_r",
2197 timespan=None,
2198 ),
2199 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2200 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2201 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2202 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2203 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2204 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2205 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2206 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2207 ],
2208 )
2210 class Ambiguous:
2211 """Tag class to denote lookups that should be ambiguous."""
2213 pass
2215 def assertLookup(
2216 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2217 ) -> None:
2218 """Local function that asserts that a bias lookup returns the given
2219 expected result.
2220 """
2221 if expected is Ambiguous:
2222 with self.assertRaises((DatasetTypeError, LookupError)):
2223 registry.findDataset(
2224 "bias",
2225 collections=collection,
2226 instrument="Cam1",
2227 detector=detector,
2228 timespan=timespan,
2229 )
2230 else:
2231 self.assertEqual(
2232 expected,
2233 registry.findDataset(
2234 "bias",
2235 collections=collection,
2236 instrument="Cam1",
2237 detector=detector,
2238 timespan=timespan,
2239 ),
2240 )
2242 # Systematically test lookups against expected results.
2243 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2244 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2245 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2246 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2247 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2248 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2249 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2250 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2251 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2252 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2253 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2254 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2255 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2256 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2257 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2258 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2259 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2260 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2261 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2262 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2263 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2264 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2265 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2266 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2267 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2268 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2269 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2270 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2271 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2272 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2273 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2274 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2275 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2276 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2277 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2278 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2279 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2280 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2281 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2282 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2283 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2284 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2286 # Test lookups via temporal joins to exposures.
2287 self.assertEqual(
2288 set(
2289 registry.queryDataIds(
2290 ["exposure", "detector"], instrument="Cam1", detector=2
2291 ).findRelatedDatasets("bias", collections=[collection])
2292 ),
2293 {
2294 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2295 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2296 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2297 },
2298 )
2299 self.assertEqual(
2300 set(
2301 registry.queryDataIds(
2302 ["exposure", "detector"], instrument="Cam1", detector=3
2303 ).findRelatedDatasets("bias", collections=[collection])
2304 ),
2305 {
2306 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2307 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2308 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2309 },
2310 )
2311 self.assertEqual(
2312 set(
2313 registry.queryDataIds(
2314 ["exposure", "detector"], instrument="Cam1", detector=2
2315 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2316 ),
2317 {
2318 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2319 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2320 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2321 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2322 },
2323 )
2324 self.assertEqual(
2325 set(
2326 registry.queryDataIds(
2327 ["exposure", "detector"], instrument="Cam1", detector=3
2328 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2329 ),
2330 {
2331 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2332 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2333 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2334 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2335 },
2336 )
2338 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2339 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2340 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2341 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2342 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2343 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2344 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2345 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2346 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2347 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2348 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2349 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2350 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2351 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2352 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2353 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2354 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2355 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2356 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2357 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2358 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2359 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2360 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2361 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2362 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2363 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2364 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2365 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2366 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2367 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2368 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2369 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2370 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2371 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2372 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2373 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2374 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2375 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2376 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2377 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2378 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2379 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2380 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2381 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2382 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2383 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2385 # Decertify everything, this time with explicit data IDs, then check
2386 # that no lookups succeed.
2387 registry.decertify(
2388 collection,
2389 "bias",
2390 Timespan(None, None),
2391 dataIds=[
2392 dict(instrument="Cam1", detector=2),
2393 dict(instrument="Cam1", detector=3),
2394 ],
2395 )
2396 for detector in (2, 3):
2397 for timespan in allTimespans:
2398 assertLookup(detector=detector, timespan=timespan, expected=None)
2399 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2400 # those.
2401 registry.certify(
2402 collection,
2403 [bias2a, bias3a],
2404 Timespan(None, None),
2405 )
2406 for timespan in allTimespans:
2407 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2408 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2409 # Decertify just bias2 over [t2, t4).
2410 # This should split a single certification row into two (and leave the
2411 # other existing row, for bias3a, alone).
2412 registry.decertify(
2413 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2414 )
2415 for timespan in allTimespans:
2416 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2417 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2418 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2419 if overlapsBefore and overlapsAfter:
2420 expected = Ambiguous
2421 elif overlapsBefore or overlapsAfter:
2422 expected = bias2a
2423 else:
2424 expected = None
2425 assertLookup(detector=2, timespan=timespan, expected=expected)
2427 def testSkipCalibs(self):
2428 """Test how queries handle skipping of calibration collections."""
2429 registry = self.makeRegistry()
2430 self.loadData(registry, "base.yaml")
2431 self.loadData(registry, "datasets.yaml")
2433 coll_calib = "Cam1/calibs/default"
2434 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2436 # Add all biases to the calibration collection.
2437 # Without this, the logic that prunes dataset subqueries based on
2438 # datasetType-collection summary information will fire before the logic
2439 # we want to test below. This is a good thing (it avoids the dreaded
2440 # NotImplementedError a bit more often) everywhere but here.
2441 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2443 coll_list = [coll_calib, "imported_g", "imported_r"]
2444 chain = "Cam1/chain"
2445 registry.registerCollection(chain, type=CollectionType.CHAINED)
2446 registry.setCollectionChain(chain, coll_list)
2448 # explicit list will raise if findFirst=True or there are temporal
2449 # dimensions
2450 with self.assertRaises(NotImplementedError):
2451 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2452 with self.assertRaises(NotImplementedError):
2453 registry.queryDataIds(
2454 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2455 ).count()
2457 # chain will skip
2458 datasets = list(registry.queryDatasets("bias", collections=chain))
2459 self.assertGreater(len(datasets), 0)
2461 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2462 self.assertGreater(len(dataIds), 0)
2464 # glob will skip too
2465 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2466 self.assertGreater(len(datasets), 0)
2468 # regular expression will skip too
2469 pattern = re.compile(".*")
2470 datasets = list(registry.queryDatasets("bias", collections=pattern))
2471 self.assertGreater(len(datasets), 0)
2473 # ellipsis should work as usual
2474 datasets = list(registry.queryDatasets("bias", collections=...))
2475 self.assertGreater(len(datasets), 0)
2477 # few tests with findFirst
2478 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2479 self.assertGreater(len(datasets), 0)
2481 def testIngestTimeQuery(self):
2482 registry = self.makeRegistry()
2483 self.loadData(registry, "base.yaml")
2484 dt0 = datetime.utcnow()
2485 self.loadData(registry, "datasets.yaml")
2486 dt1 = datetime.utcnow()
2488 datasets = list(registry.queryDatasets(..., collections=...))
2489 len0 = len(datasets)
2490 self.assertGreater(len0, 0)
2492 where = "ingest_date > T'2000-01-01'"
2493 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2494 len1 = len(datasets)
2495 self.assertEqual(len0, len1)
2497 # no one will ever use this piece of software in 30 years
2498 where = "ingest_date > T'2050-01-01'"
2499 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2500 len2 = len(datasets)
2501 self.assertEqual(len2, 0)
2503 # Check more exact timing to make sure there is no 37 seconds offset
2504 # (after fixing DM-30124). SQLite time precision is 1 second, make
2505 # sure that we don't test with higher precision.
2506 tests = [
2507 # format: (timestamp, operator, expected_len)
2508 (dt0 - timedelta(seconds=1), ">", len0),
2509 (dt0 - timedelta(seconds=1), "<", 0),
2510 (dt1 + timedelta(seconds=1), "<", len0),
2511 (dt1 + timedelta(seconds=1), ">", 0),
2512 ]
2513 for dt, op, expect_len in tests:
2514 dt_str = dt.isoformat(sep=" ")
2516 where = f"ingest_date {op} T'{dt_str}'"
2517 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2518 self.assertEqual(len(datasets), expect_len)
2520 # same with bind using datetime or astropy Time
2521 where = f"ingest_date {op} ingest_time"
2522 datasets = list(
2523 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2524 )
2525 self.assertEqual(len(datasets), expect_len)
2527 dt_astropy = astropy.time.Time(dt, format="datetime")
2528 datasets = list(
2529 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2530 )
2531 self.assertEqual(len(datasets), expect_len)
2533 def testTimespanQueries(self):
2534 """Test query expressions involving timespans."""
2535 registry = self.makeRegistry()
2536 self.loadData(registry, "hsc-rc2-subset.yaml")
2537 # All exposures in the database; mapping from ID to timespan.
2538 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2539 # Just those IDs, sorted (which is also temporal sorting, because HSC
2540 # exposure IDs are monotonically increasing).
2541 ids = sorted(visits.keys())
2542 self.assertGreater(len(ids), 20)
2543 # Pick some quasi-random indexes into `ids` to play with.
2544 i1 = int(len(ids) * 0.1)
2545 i2 = int(len(ids) * 0.3)
2546 i3 = int(len(ids) * 0.6)
2547 i4 = int(len(ids) * 0.8)
2548 # Extract some times from those: just before the beginning of i1 (which
2549 # should be after the end of the exposure before), exactly the
2550 # beginning of i2, just after the beginning of i3 (and before its end),
2551 # and the exact end of i4.
2552 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2553 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2554 t2 = visits[ids[i2]].begin
2555 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2556 self.assertLess(t3, visits[ids[i3]].end)
2557 t4 = visits[ids[i4]].end
2558 # Make sure those are actually in order.
2559 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2561 bind = {
2562 "t1": t1,
2563 "t2": t2,
2564 "t3": t3,
2565 "t4": t4,
2566 "ts23": Timespan(t2, t3),
2567 }
2569 def query(where):
2570 """Return results as a sorted, deduplicated list of visit IDs."""
2571 return sorted(
2572 {
2573 dataId["visit"]
2574 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2575 }
2576 )
2578 # Try a bunch of timespan queries, mixing up the bounds themselves,
2579 # where they appear in the expression, and how we get the timespan into
2580 # the expression.
2582 # t1 is before the start of i1, so this should not include i1.
2583 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2584 # t2 is exactly at the start of i2, but ends are exclusive, so these
2585 # should not include i2.
2586 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2587 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2588 # t3 is in the middle of i3, so this should include i3.
2589 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2590 # This one should not include t3 by the same reasoning.
2591 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2592 # t4 is exactly at the end of i4, so this should include i4.
2593 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2594 # i4's upper bound of t4 is exclusive so this should not include t4.
2595 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2597 # Now some timespan vs. time scalar queries.
2598 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2599 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2600 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2601 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2602 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2603 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2605 # Empty timespans should not overlap anything.
2606 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2608 def testCollectionSummaries(self):
2609 """Test recording and retrieval of collection summaries."""
2610 self.maxDiff = None
2611 registry = self.makeRegistry()
2612 # Importing datasets from yaml should go through the code path where
2613 # we update collection summaries as we insert datasets.
2614 self.loadData(registry, "base.yaml")
2615 self.loadData(registry, "datasets.yaml")
2616 flat = registry.getDatasetType("flat")
2617 expected1 = CollectionSummary()
2618 expected1.dataset_types.add(registry.getDatasetType("bias"))
2619 expected1.add_data_ids(
2620 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2621 )
2622 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2623 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2624 # Create a chained collection with both of the imported runs; the
2625 # summary should be the same, because it's a union with itself.
2626 chain = "chain"
2627 registry.registerCollection(chain, CollectionType.CHAINED)
2628 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2629 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2630 # Associate flats only into a tagged collection and a calibration
2631 # collection to check summaries of those.
2632 tag = "tag"
2633 registry.registerCollection(tag, CollectionType.TAGGED)
2634 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2635 calibs = "calibs"
2636 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2637 registry.certify(
2638 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2639 )
2640 expected2 = expected1.copy()
2641 expected2.dataset_types.discard("bias")
2642 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2643 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2644 # Explicitly calling Registry.refresh() should load those same
2645 # summaries, via a totally different code path.
2646 registry.refresh()
2647 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2648 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2649 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2650 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2652 def testBindInQueryDatasets(self):
2653 """Test that the bind parameter is correctly forwarded in
2654 queryDatasets recursion.
2655 """
2656 registry = self.makeRegistry()
2657 # Importing datasets from yaml should go through the code path where
2658 # we update collection summaries as we insert datasets.
2659 self.loadData(registry, "base.yaml")
2660 self.loadData(registry, "datasets.yaml")
2661 self.assertEqual(
2662 set(registry.queryDatasets("flat", band="r", collections=...)),
2663 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2664 )
2666 def testQueryIntRangeExpressions(self):
2667 """Test integer range expressions in ``where`` arguments.
2669 Note that our expressions use inclusive stop values, unlike Python's.
2670 """
2671 registry = self.makeRegistry()
2672 self.loadData(registry, "base.yaml")
2673 self.assertEqual(
2674 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2675 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2676 )
2677 self.assertEqual(
2678 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2679 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2680 )
2681 self.assertEqual(
2682 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2683 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2684 )
2686 def testQueryResultSummaries(self):
2687 """Test summary methods like `count`, `any`, and `explain_no_results`
2688 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2689 """
2690 registry = self.makeRegistry()
2691 self.loadData(registry, "base.yaml")
2692 self.loadData(registry, "datasets.yaml")
2693 self.loadData(registry, "spatial.yaml")
2694 # Default test dataset has two collections, each with both flats and
2695 # biases. Add a new collection with only biases.
2696 registry.registerCollection("biases", CollectionType.TAGGED)
2697 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2698 # First query yields two results, and involves no postprocessing.
2699 query1 = registry.queryDataIds(["physical_filter"], band="r")
2700 self.assertTrue(query1.any(execute=False, exact=False))
2701 self.assertTrue(query1.any(execute=True, exact=False))
2702 self.assertTrue(query1.any(execute=True, exact=True))
2703 self.assertEqual(query1.count(exact=False), 2)
2704 self.assertEqual(query1.count(exact=True), 2)
2705 self.assertFalse(list(query1.explain_no_results()))
2706 # Second query should yield no results, which we should see when
2707 # we attempt to expand the data ID.
2708 query2 = registry.queryDataIds(["physical_filter"], band="h")
2709 # There's no execute=False, exact=Fals test here because the behavior
2710 # not something we want to guarantee in this case (and exact=False
2711 # says either answer is legal).
2712 self.assertFalse(query2.any(execute=True, exact=False))
2713 self.assertFalse(query2.any(execute=True, exact=True))
2714 self.assertEqual(query2.count(exact=False), 0)
2715 self.assertEqual(query2.count(exact=True), 0)
2716 self.assertTrue(list(query2.explain_no_results()))
2717 # These queries yield no results due to various problems that can be
2718 # spotted prior to execution, yielding helpful diagnostics.
2719 base_query = registry.queryDataIds(["detector", "physical_filter"])
2720 queries_and_snippets = [
2721 (
2722 # Dataset type name doesn't match any existing dataset types.
2723 registry.queryDatasets("nonexistent", collections=...),
2724 ["nonexistent"],
2725 ),
2726 (
2727 # Dataset type object isn't registered.
2728 registry.queryDatasets(
2729 DatasetType(
2730 "nonexistent",
2731 dimensions=["instrument"],
2732 universe=registry.dimensions,
2733 storageClass="Image",
2734 ),
2735 collections=...,
2736 ),
2737 ["nonexistent"],
2738 ),
2739 (
2740 # No datasets of this type in this collection.
2741 registry.queryDatasets("flat", collections=["biases"]),
2742 ["flat", "biases"],
2743 ),
2744 (
2745 # No datasets of this type in this collection.
2746 base_query.findDatasets("flat", collections=["biases"]),
2747 ["flat", "biases"],
2748 ),
2749 (
2750 # No collections matching at all.
2751 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2752 ["potato"],
2753 ),
2754 ]
2755 # The behavior of these additional queries is slated to change in the
2756 # future, so we also check for deprecation warnings.
2757 with self.assertWarns(FutureWarning):
2758 queries_and_snippets.append(
2759 (
2760 # Dataset type name doesn't match any existing dataset
2761 # types.
2762 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2763 ["nonexistent"],
2764 )
2765 )
2766 with self.assertWarns(FutureWarning):
2767 queries_and_snippets.append(
2768 (
2769 # Dataset type name doesn't match any existing dataset
2770 # types.
2771 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2772 ["nonexistent"],
2773 )
2774 )
2775 for query, snippets in queries_and_snippets:
2776 self.assertFalse(query.any(execute=False, exact=False))
2777 self.assertFalse(query.any(execute=True, exact=False))
2778 self.assertFalse(query.any(execute=True, exact=True))
2779 self.assertEqual(query.count(exact=False), 0)
2780 self.assertEqual(query.count(exact=True), 0)
2781 messages = list(query.explain_no_results())
2782 self.assertTrue(messages)
2783 # Want all expected snippets to appear in at least one message.
2784 self.assertTrue(
2785 any(
2786 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2787 ),
2788 messages,
2789 )
2791 # This query does yield results, but should also emit a warning because
2792 # dataset type patterns to queryDataIds is deprecated; just look for
2793 # the warning.
2794 with self.assertWarns(FutureWarning):
2795 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2797 # These queries yield no results due to problems that can be identified
2798 # by cheap follow-up queries, yielding helpful diagnostics.
2799 for query, snippets in [
2800 (
2801 # No records for one of the involved dimensions.
2802 registry.queryDataIds(["subfilter"]),
2803 ["no rows", "subfilter"],
2804 ),
2805 (
2806 # No records for one of the involved dimensions.
2807 registry.queryDimensionRecords("subfilter"),
2808 ["no rows", "subfilter"],
2809 ),
2810 ]:
2811 self.assertFalse(query.any(execute=True, exact=False))
2812 self.assertFalse(query.any(execute=True, exact=True))
2813 self.assertEqual(query.count(exact=True), 0)
2814 messages = list(query.explain_no_results())
2815 self.assertTrue(messages)
2816 # Want all expected snippets to appear in at least one message.
2817 self.assertTrue(
2818 any(
2819 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2820 ),
2821 messages,
2822 )
2824 # This query yields four overlaps in the database, but one is filtered
2825 # out in postprocessing. The count queries aren't accurate because
2826 # they don't account for duplication that happens due to an internal
2827 # join against commonSkyPix.
2828 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2829 self.assertEqual(
2830 {
2831 DataCoordinate.standardize(
2832 instrument="Cam1",
2833 skymap="SkyMap1",
2834 visit=v,
2835 tract=t,
2836 universe=registry.dimensions,
2837 )
2838 for v, t in [(1, 0), (2, 0), (2, 1)]
2839 },
2840 set(query3),
2841 )
2842 self.assertTrue(query3.any(execute=False, exact=False))
2843 self.assertTrue(query3.any(execute=True, exact=False))
2844 self.assertTrue(query3.any(execute=True, exact=True))
2845 self.assertGreaterEqual(query3.count(exact=False), 4)
2846 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2847 self.assertFalse(list(query3.explain_no_results()))
2848 # This query yields overlaps in the database, but all are filtered
2849 # out in postprocessing. The count queries again aren't very useful.
2850 # We have to use `where=` here to avoid an optimization that
2851 # (currently) skips the spatial postprocess-filtering because it
2852 # recognizes that no spatial join is necessary. That's not ideal, but
2853 # fixing it is out of scope for this ticket.
2854 query4 = registry.queryDataIds(
2855 ["visit", "tract"],
2856 instrument="Cam1",
2857 skymap="SkyMap1",
2858 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2859 )
2860 self.assertFalse(set(query4))
2861 self.assertTrue(query4.any(execute=False, exact=False))
2862 self.assertTrue(query4.any(execute=True, exact=False))
2863 self.assertFalse(query4.any(execute=True, exact=True))
2864 self.assertGreaterEqual(query4.count(exact=False), 1)
2865 self.assertEqual(query4.count(exact=True, discard=True), 0)
2866 messages = query4.explain_no_results()
2867 self.assertTrue(messages)
2868 self.assertTrue(any("overlap" in message for message in messages))
2869 # This query should yield results from one dataset type but not the
2870 # other, which is not registered.
2871 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2872 self.assertTrue(set(query5))
2873 self.assertTrue(query5.any(execute=False, exact=False))
2874 self.assertTrue(query5.any(execute=True, exact=False))
2875 self.assertTrue(query5.any(execute=True, exact=True))
2876 self.assertGreaterEqual(query5.count(exact=False), 1)
2877 self.assertGreaterEqual(query5.count(exact=True), 1)
2878 self.assertFalse(list(query5.explain_no_results()))
2879 # This query applies a selection that yields no results, fully in the
2880 # database. Explaining why it fails involves traversing the relation
2881 # tree and running a LIMIT 1 query at each level that has the potential
2882 # to remove rows.
2883 query6 = registry.queryDimensionRecords(
2884 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2885 )
2886 self.assertEqual(query6.count(exact=True), 0)
2887 messages = query6.explain_no_results()
2888 self.assertTrue(messages)
2889 self.assertTrue(any("no-purpose" in message for message in messages))
2891 def testQueryDataIdsExpressionError(self):
2892 """Test error checking of 'where' expressions in queryDataIds."""
2893 registry = self.makeRegistry()
2894 self.loadData(registry, "base.yaml")
2895 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2896 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2897 registry.queryDataIds(["detector"], where="foo.bar = 12")
2898 with self.assertRaisesRegex(
2899 LookupError, "Dimension element name cannot be inferred in this context."
2900 ):
2901 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2903 def testQueryDataIdsOrderBy(self):
2904 """Test order_by and limit on result returned by queryDataIds()."""
2905 registry = self.makeRegistry()
2906 self.loadData(registry, "base.yaml")
2907 self.loadData(registry, "datasets.yaml")
2908 self.loadData(registry, "spatial.yaml")
2910 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2911 return registry.queryDataIds(
2912 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2913 )
2915 Test = namedtuple(
2916 "testQueryDataIdsOrderByTest",
2917 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2918 defaults=(None, None, None),
2919 )
2921 test_data = (
2922 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2923 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2924 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2925 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2926 Test(
2927 "tract.id,visit.id",
2928 "tract,visit",
2929 ((0, 1), (0, 1), (0, 2)),
2930 limit=(3,),
2931 ),
2932 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2933 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2934 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2935 Test(
2936 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2937 ),
2938 Test(
2939 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2940 ),
2941 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2942 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2943 Test(
2944 "tract,-timespan.begin,timespan.end",
2945 "tract,visit",
2946 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2947 ),
2948 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2949 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2950 Test(
2951 "tract,detector",
2952 "tract,detector",
2953 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2954 datasets="flat",
2955 collections="imported_r",
2956 ),
2957 Test(
2958 "tract,detector.full_name",
2959 "tract,detector",
2960 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2961 datasets="flat",
2962 collections="imported_r",
2963 ),
2964 Test(
2965 "tract,detector.raft,detector.name_in_raft",
2966 "tract,detector",
2967 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2968 datasets="flat",
2969 collections="imported_r",
2970 ),
2971 )
2973 for test in test_data:
2974 order_by = test.order_by.split(",")
2975 keys = test.keys.split(",")
2976 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2977 if test.limit is not None:
2978 query = query.limit(*test.limit)
2979 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2980 self.assertEqual(dataIds, test.result)
2982 # and materialize
2983 query = do_query(keys).order_by(*order_by)
2984 if test.limit is not None:
2985 query = query.limit(*test.limit)
2986 with self.assertRaises(RelationalAlgebraError):
2987 with query.materialize():
2988 pass
2990 # errors in a name
2991 for order_by in ("", "-"):
2992 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2993 list(do_query().order_by(order_by))
2995 for order_by in ("undimension.name", "-undimension.name"):
2996 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2997 list(do_query().order_by(order_by))
2999 for order_by in ("attract", "-attract"):
3000 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
3001 list(do_query().order_by(order_by))
3003 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
3004 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3006 with self.assertRaisesRegex(
3007 ValueError,
3008 r"Timespan exists in more than one dimension element \(exposure, visit\); "
3009 r"qualify timespan with specific dimension name\.",
3010 ):
3011 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3013 with self.assertRaisesRegex(
3014 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
3015 ):
3016 list(do_query("tract").order_by("timespan.begin"))
3018 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
3019 list(do_query("tract").order_by("tract.timespan.begin"))
3021 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
3022 list(do_query("tract").order_by("tract.name"))
3024 with self.assertRaisesRegex(
3025 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
3026 ):
3027 list(do_query("visit").order_by("timestamp.begin"))
3029 def testQueryDataIdsGovernorExceptions(self):
3030 """Test exceptions raised by queryDataIds() for incorrect governors."""
3031 registry = self.makeRegistry()
3032 self.loadData(registry, "base.yaml")
3033 self.loadData(registry, "datasets.yaml")
3034 self.loadData(registry, "spatial.yaml")
3036 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3037 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3039 Test = namedtuple(
3040 "testQueryDataIdExceptionsTest",
3041 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3042 defaults=(None, None, None, {}, None, 0),
3043 )
3045 test_data = (
3046 Test("tract,visit", count=6),
3047 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3048 Test(
3049 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
3050 ),
3051 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
3052 Test(
3053 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
3054 ),
3055 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
3056 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
3057 Test(
3058 "tract,visit",
3059 where="instrument=cam AND skymap=map",
3060 bind={"cam": "Cam1", "map": "SkyMap1"},
3061 count=6,
3062 ),
3063 Test(
3064 "tract,visit",
3065 where="instrument=cam AND skymap=map",
3066 bind={"cam": "Cam", "map": "SkyMap"},
3067 exception=DataIdValueError,
3068 ),
3069 )
3071 for test in test_data:
3072 dimensions = test.dimensions.split(",")
3073 if test.exception:
3074 with self.assertRaises(test.exception):
3075 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3076 else:
3077 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3078 self.assertEqual(query.count(discard=True), test.count)
3080 # and materialize
3081 if test.exception:
3082 with self.assertRaises(test.exception):
3083 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3084 with query.materialize() as materialized:
3085 materialized.count(discard=True)
3086 else:
3087 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3088 with query.materialize() as materialized:
3089 self.assertEqual(materialized.count(discard=True), test.count)
3091 def testQueryDimensionRecordsOrderBy(self):
3092 """Test order_by and limit on result returned by
3093 queryDimensionRecords().
3094 """
3095 registry = self.makeRegistry()
3096 self.loadData(registry, "base.yaml")
3097 self.loadData(registry, "datasets.yaml")
3098 self.loadData(registry, "spatial.yaml")
3100 def do_query(element, datasets=None, collections=None):
3101 return registry.queryDimensionRecords(
3102 element, instrument="Cam1", datasets=datasets, collections=collections
3103 )
3105 query = do_query("detector")
3106 self.assertEqual(len(list(query)), 4)
3108 Test = namedtuple(
3109 "testQueryDataIdsOrderByTest",
3110 ("element", "order_by", "result", "limit", "datasets", "collections"),
3111 defaults=(None, None, None),
3112 )
3114 test_data = (
3115 Test("detector", "detector", (1, 2, 3, 4)),
3116 Test("detector", "-detector", (4, 3, 2, 1)),
3117 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3118 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3119 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
3120 Test("visit", "visit", (1, 2)),
3121 Test("visit", "-visit.id", (2, 1)),
3122 Test("visit", "zenith_angle", (1, 2)),
3123 Test("visit", "-visit.name", (2, 1)),
3124 Test("visit", "day_obs,-timespan.begin", (2, 1)),
3125 )
3127 for test in test_data:
3128 order_by = test.order_by.split(",")
3129 query = do_query(test.element).order_by(*order_by)
3130 if test.limit is not None:
3131 query = query.limit(*test.limit)
3132 dataIds = tuple(rec.id for rec in query)
3133 self.assertEqual(dataIds, test.result)
3135 # errors in a name
3136 for order_by in ("", "-"):
3137 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3138 list(do_query("detector").order_by(order_by))
3140 for order_by in ("undimension.name", "-undimension.name"):
3141 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3142 list(do_query("detector").order_by(order_by))
3144 for order_by in ("attract", "-attract"):
3145 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3146 list(do_query("detector").order_by(order_by))
3148 for order_by in ("timestamp.begin", "-timestamp.begin"):
3149 with self.assertRaisesRegex(
3150 ValueError,
3151 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3152 r"perhaps you meant 'timespan.begin'\?",
3153 ):
3154 list(do_query("visit").order_by(order_by))
3156 def testQueryDimensionRecordsExceptions(self):
3157 """Test exceptions raised by queryDimensionRecords()."""
3158 registry = self.makeRegistry()
3159 self.loadData(registry, "base.yaml")
3160 self.loadData(registry, "datasets.yaml")
3161 self.loadData(registry, "spatial.yaml")
3163 result = registry.queryDimensionRecords("detector")
3164 self.assertEqual(result.count(), 4)
3165 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3166 self.assertEqual(result.count(), 4)
3167 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3168 self.assertEqual(result.count(), 4)
3169 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3170 self.assertEqual(result.count(), 4)
3171 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3172 self.assertEqual(result.count(), 4)
3174 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3175 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3176 result.count()
3178 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3179 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3180 result.count()
3182 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3183 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3184 result.count()
3186 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3187 result = registry.queryDimensionRecords(
3188 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3189 )
3190 result.count()
3192 def testDatasetConstrainedDimensionRecordQueries(self):
3193 """Test that queryDimensionRecords works even when given a dataset
3194 constraint whose dimensions extend beyond the requested dimension
3195 element's.
3196 """
3197 registry = self.makeRegistry()
3198 self.loadData(registry, "base.yaml")
3199 self.loadData(registry, "datasets.yaml")
3200 # Query for physical_filter dimension records, using a dataset that
3201 # has both physical_filter and dataset dimensions.
3202 records = registry.queryDimensionRecords(
3203 "physical_filter",
3204 datasets=["flat"],
3205 collections="imported_r",
3206 )
3207 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3208 # Trying to constrain by all dataset types is an error.
3209 with self.assertRaises(TypeError):
3210 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3212 def testSkyPixDatasetQueries(self):
3213 """Test that we can build queries involving skypix dimensions as long
3214 as a dataset type that uses those dimensions is included.
3215 """
3216 registry = self.makeRegistry()
3217 self.loadData(registry, "base.yaml")
3218 dataset_type = DatasetType(
3219 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3220 )
3221 registry.registerDatasetType(dataset_type)
3222 run = "r"
3223 registry.registerRun(run)
3224 # First try queries where there are no datasets; the concern is whether
3225 # we can even build and execute these queries without raising, even
3226 # when "doomed" query shortcuts are in play.
3227 self.assertFalse(
3228 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3229 )
3230 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3231 # Now add a dataset and see that we can get it back.
3232 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3233 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3234 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3235 self.assertEqual(
3236 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3237 {data_id},
3238 )
3239 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3241 def testDatasetIdFactory(self):
3242 """Simple test for DatasetIdFactory, mostly to catch potential changes
3243 in its API.
3244 """
3245 registry = self.makeRegistry()
3246 factory = DatasetIdFactory()
3247 dataset_type = DatasetType(
3248 "datasetType",
3249 dimensions=["detector", "instrument"],
3250 universe=registry.dimensions,
3251 storageClass="int",
3252 )
3253 run = "run"
3254 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3256 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3257 self.assertIsInstance(datasetId, uuid.UUID)
3258 self.assertEqual(datasetId.version, 4)
3260 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3261 self.assertIsInstance(datasetId, uuid.UUID)
3262 self.assertEqual(datasetId.version, 5)
3264 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3265 self.assertIsInstance(datasetId, uuid.UUID)
3266 self.assertEqual(datasetId.version, 5)
3268 def testExposureQueries(self):
3269 """Test query methods using arguments sourced from the exposure log
3270 service.
3272 The most complete test dataset currently available to daf_butler tests
3273 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3274 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3275 dimension records as it was focused on providing nontrivial spatial
3276 overlaps between visit+detector and tract+patch. So in this test we
3277 need to translate queries that originally used the exposure dimension
3278 to use the (very similar) visit dimension instead.
3279 """
3280 registry = self.makeRegistry()
3281 self.loadData(registry, "hsc-rc2-subset.yaml")
3282 self.assertEqual(
3283 [
3284 record.id
3285 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3286 .order_by("id")
3287 .limit(5)
3288 ],
3289 [318, 322, 326, 330, 332],
3290 )
3291 self.assertEqual(
3292 [
3293 data_id["visit"]
3294 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3295 ],
3296 [318, 322, 326, 330, 332],
3297 )
3298 self.assertEqual(
3299 [
3300 record.id
3301 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3302 .order_by("full_name")
3303 .limit(5)
3304 ],
3305 [73, 72, 71, 70, 65],
3306 )
3307 self.assertEqual(
3308 [
3309 data_id["detector"]
3310 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3311 .order_by("full_name")
3312 .limit(5)
3313 ],
3314 [73, 72, 71, 70, 65],
3315 )
3317 def test_long_query_names(self) -> None:
3318 """Test that queries involving very long names are handled correctly.
3320 This is especially important for PostgreSQL, which truncates symbols
3321 longer than 64 chars, but it's worth testing for all DBs.
3322 """
3323 registry = self.makeRegistry()
3324 name = "abcd" * 17
3325 registry.registerDatasetType(
3326 DatasetType(
3327 name,
3328 dimensions=(),
3329 storageClass="Exposure",
3330 universe=registry.dimensions,
3331 )
3332 )
3333 # Need to search more than one collection actually containing a
3334 # matching dataset to avoid optimizations that sidestep bugs due to
3335 # truncation by making findFirst=True a no-op.
3336 run1 = "run1"
3337 registry.registerRun(run1)
3338 run2 = "run2"
3339 registry.registerRun(run2)
3340 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3341 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3342 self.assertEqual(
3343 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3344 {ref1},
3345 )
3347 def test_skypix_constraint_queries(self) -> None:
3348 """Test queries spatially constrained by a skypix data ID."""
3349 registry = self.makeRegistry()
3350 self.loadData(registry, "hsc-rc2-subset.yaml")
3351 patch_regions = {
3352 (data_id["tract"], data_id["patch"]): data_id.region
3353 for data_id in registry.queryDataIds(["patch"]).expanded()
3354 }
3355 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3356 # This check ensures the test doesn't become trivial due to a config
3357 # change; if it does, just pick a different HTML level.
3358 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3359 # Gather all skypix IDs that definitely overlap at least one of these
3360 # patches.
3361 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3362 for patch_region in patch_regions.values():
3363 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3364 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3365 # and does not overlap at least one other patch.
3366 for skypix_id in itertools.chain.from_iterable(
3367 range(begin, end) for begin, end in relevant_skypix_ids
3368 ):
3369 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3370 overlapping_patches = {
3371 patch_key
3372 for patch_key, patch_region in patch_regions.items()
3373 if not patch_region.isDisjointFrom(skypix_region)
3374 }
3375 if overlapping_patches and overlapping_patches != patch_regions.keys():
3376 break
3377 else:
3378 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3379 self.assertEqual(
3380 {
3381 (data_id["tract"], data_id["patch"])
3382 for data_id in registry.queryDataIds(
3383 ["patch"],
3384 dataId={skypix_dimension.name: skypix_id},
3385 )
3386 },
3387 overlapping_patches,
3388 )
3389 # Test that a three-way join that includes the common skypix system in
3390 # the dimensions doesn't generate redundant join terms in the query.
3391 full_data_ids = set(
3392 registry.queryDataIds(
3393 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3394 ).expanded()
3395 )
3396 self.assertGreater(len(full_data_ids), 0)
3397 for data_id in full_data_ids:
3398 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3399 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3401 def test_spatial_constraint_queries(self) -> None:
3402 """Test queries in which one spatial dimension in the constraint (data
3403 ID or ``where`` string) constrains a different spatial dimension in the
3404 query result columns.
3405 """
3406 registry = self.makeRegistry()
3407 self.loadData(registry, "hsc-rc2-subset.yaml")
3408 patch_regions = {
3409 (data_id["tract"], data_id["patch"]): data_id.region
3410 for data_id in registry.queryDataIds(["patch"]).expanded()
3411 }
3412 observation_regions = {
3413 (data_id["visit"], data_id["detector"]): data_id.region
3414 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3415 }
3416 all_combos = {
3417 (patch_key, observation_key)
3418 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3419 }
3420 overlapping_combos = {
3421 (patch_key, observation_key)
3422 for patch_key, observation_key in all_combos
3423 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3424 }
3425 # Check a direct spatial join with no constraint first.
3426 self.assertEqual(
3427 {
3428 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3429 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3430 },
3431 overlapping_combos,
3432 )
3433 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3434 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3435 for patch_key, observation_key in overlapping_combos:
3436 overlaps_by_patch[patch_key].add(observation_key)
3437 overlaps_by_observation[observation_key].add(patch_key)
3438 # Find patches and observations that overlap at least one of the other
3439 # but not all of the other.
3440 nontrivial_patch = next(
3441 iter(
3442 patch_key
3443 for patch_key, observation_keys in overlaps_by_patch.items()
3444 if observation_keys and observation_keys != observation_regions.keys()
3445 )
3446 )
3447 nontrivial_observation = next(
3448 iter(
3449 observation_key
3450 for observation_key, patch_keys in overlaps_by_observation.items()
3451 if patch_keys and patch_keys != patch_regions.keys()
3452 )
3453 )
3454 # Use the nontrivial patches and observations as constraints on the
3455 # other dimensions in various ways, first via a 'where' expression.
3456 # It's better in general to us 'bind' instead of f-strings, but these
3457 # all integers so there are no quoting concerns.
3458 self.assertEqual(
3459 {
3460 (data_id["visit"], data_id["detector"])
3461 for data_id in registry.queryDataIds(
3462 ["visit", "detector"],
3463 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3464 skymap="hsc_rings_v1",
3465 )
3466 },
3467 overlaps_by_patch[nontrivial_patch],
3468 )
3469 self.assertEqual(
3470 {
3471 (data_id["tract"], data_id["patch"])
3472 for data_id in registry.queryDataIds(
3473 ["patch"],
3474 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3475 instrument="HSC",
3476 )
3477 },
3478 overlaps_by_observation[nontrivial_observation],
3479 )
3480 # and then via the dataId argument.
3481 self.assertEqual(
3482 {
3483 (data_id["visit"], data_id["detector"])
3484 for data_id in registry.queryDataIds(
3485 ["visit", "detector"],
3486 dataId={
3487 "tract": nontrivial_patch[0],
3488 "patch": nontrivial_patch[1],
3489 },
3490 skymap="hsc_rings_v1",
3491 )
3492 },
3493 overlaps_by_patch[nontrivial_patch],
3494 )
3495 self.assertEqual(
3496 {
3497 (data_id["tract"], data_id["patch"])
3498 for data_id in registry.queryDataIds(
3499 ["patch"],
3500 dataId={
3501 "visit": nontrivial_observation[0],
3502 "detector": nontrivial_observation[1],
3503 },
3504 instrument="HSC",
3505 )
3506 },
3507 overlaps_by_observation[nontrivial_observation],
3508 )
3510 def test_query_projection_drop_postprocessing(self) -> None:
3511 """Test that projections and deduplications on query objects can
3512 drop post-query region filtering to ensure the query remains in
3513 the SQL engine.
3514 """
3515 registry = self.makeRegistry()
3516 self.loadData(registry, "base.yaml")
3517 self.loadData(registry, "spatial.yaml")
3519 def pop_transfer(tree: Relation) -> Relation:
3520 """If a relation tree terminates with a transfer to a new engine,
3521 return the relation prior to that transfer. If not, return the
3522 original relation.
3523 """
3524 match tree:
3525 case Transfer(target=target):
3526 return target
3527 case _:
3528 return tree
3530 # There's no public way to get a Query object yet, so we get one from a
3531 # DataCoordinateQueryResults private attribute. When a public API is
3532 # available this test should use it.
3533 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3534 # We expect this query to terminate in the iteration engine originally,
3535 # because region-filtering is necessary.
3536 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3537 # If we deduplicate, we usually have to do that downstream of the
3538 # filtering. That means the deduplication has to happen in the
3539 # iteration engine.
3540 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3541 # If we pass drop_postprocessing, we instead drop the region filtering
3542 # so the deduplication can happen in SQL (though there might still be
3543 # transfer to iteration at the tail of the tree that we can ignore;
3544 # that's what the pop_transfer takes care of here).
3545 self.assertIsInstance(
3546 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3547 sql.Engine,
3548 )
3550 def test_query_find_datasets_drop_postprocessing(self) -> None:
3551 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3552 problems with the FindFirstDataset relation operation.
3553 """
3554 # Setup: load some visit, tract, and patch records, and insert two
3555 # datasets with dimensions {visit, patch}, with one in each of two
3556 # RUN collections.
3557 registry = self.makeRegistry()
3558 self.loadData(registry, "base.yaml")
3559 self.loadData(registry, "spatial.yaml")
3560 storage_class = StorageClass("Warpy")
3561 registry.storageClasses.registerStorageClass(storage_class)
3562 dataset_type = DatasetType(
3563 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3564 )
3565 registry.registerDatasetType(dataset_type)
3566 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3567 registry.registerRun("run1")
3568 registry.registerRun("run2")
3569 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3570 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3571 # Query for the dataset using queryDataIds(...).findDatasets(...)
3572 # against only one of the two collections. This should work even
3573 # though the relation returned by queryDataIds ends with
3574 # iteration-engine region-filtering, because we can recognize before
3575 # running the query that there is only one collecton to search and
3576 # hence the (default) findFirst=True is irrelevant, and joining in the
3577 # dataset query commutes past the iteration-engine postprocessing.
3578 query1 = registry.queryDataIds(
3579 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3580 )
3581 self.assertEqual(
3582 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3583 {ref1},
3584 )
3585 # Query for the dataset using queryDataIds(...).findDatasets(...)
3586 # against both collections. This can only work if the FindFirstDataset
3587 # operation can be commuted past the iteration-engine options into SQL.
3588 query2 = registry.queryDataIds(
3589 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3590 )
3591 self.assertEqual(
3592 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3593 {ref2},
3594 )
3596 def test_query_empty_collections(self) -> None:
3597 """Test for registry query methods with empty collections. The methods
3598 should return empty result set (or None when applicable) and provide
3599 "doomed" diagnostics.
3600 """
3601 registry = self.makeRegistry()
3602 self.loadData(registry, "base.yaml")
3603 self.loadData(registry, "datasets.yaml")
3605 # Tests for registry.findDataset()
3606 with self.assertRaises(NoDefaultCollectionError):
3607 registry.findDataset("bias", instrument="Cam1", detector=1)
3608 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3609 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3611 # Tests for registry.queryDatasets()
3612 with self.assertRaises(NoDefaultCollectionError):
3613 registry.queryDatasets("bias")
3614 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3616 result = registry.queryDatasets("bias", collections=[])
3617 self.assertEqual(len(list(result)), 0)
3618 messages = list(result.explain_no_results())
3619 self.assertTrue(messages)
3620 self.assertTrue(any("because collection list is empty" in message for message in messages))
3622 # Tests for registry.queryDataIds()
3623 with self.assertRaises(NoDefaultCollectionError):
3624 registry.queryDataIds("detector", datasets="bias")
3625 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3627 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3628 self.assertEqual(len(list(result)), 0)
3629 messages = list(result.explain_no_results())
3630 self.assertTrue(messages)
3631 self.assertTrue(any("because collection list is empty" in message for message in messages))
3633 # Tests for registry.queryDimensionRecords()
3634 with self.assertRaises(NoDefaultCollectionError):
3635 registry.queryDimensionRecords("detector", datasets="bias")
3636 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3638 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3639 self.assertEqual(len(list(result)), 0)
3640 messages = list(result.explain_no_results())
3641 self.assertTrue(messages)
3642 self.assertTrue(any("because collection list is empty" in message for message in messages))
3644 def test_dataset_followup_spatial_joins(self) -> None:
3645 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3646 is involved.
3647 """
3648 registry = self.makeRegistry()
3649 self.loadData(registry, "base.yaml")
3650 self.loadData(registry, "spatial.yaml")
3651 pvi_dataset_type = DatasetType(
3652 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3653 )
3654 registry.registerDatasetType(pvi_dataset_type)
3655 collection = "datasets"
3656 registry.registerRun(collection)
3657 (pvi1,) = registry.insertDatasets(
3658 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3659 )
3660 (pvi2,) = registry.insertDatasets(
3661 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3662 )
3663 (pvi3,) = registry.insertDatasets(
3664 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3665 )
3666 self.assertEqual(
3667 set(
3668 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3669 .expanded()
3670 .findRelatedDatasets("pvi", [collection])
3671 ),
3672 {
3673 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3674 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3675 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3676 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3677 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3678 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3679 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3680 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3681 },
3682 )