Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1477 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-25 10:50 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import datetime
34import itertools
35import os
36import re
37import unittest
38import uuid
39from abc import ABC, abstractmethod
40from collections import defaultdict, namedtuple
41from collections.abc import Iterator
42from datetime import timedelta
43from typing import TYPE_CHECKING
45import astropy.time
46import sqlalchemy
48try:
49 import numpy as np
50except ImportError:
51 np = None
53import lsst.sphgeom
54from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
56from ..._dataset_association import DatasetAssociation
57from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
58from ..._dataset_type import DatasetType
59from ..._storage_class import StorageClass
60from ..._timespan import Timespan
61from ...dimensions import DataCoordinate, DataCoordinateSet, SkyPixDimension
62from .._collection_summary import CollectionSummary
63from .._collection_type import CollectionType
64from .._config import RegistryConfig
65from .._exceptions import (
66 ArgumentError,
67 CollectionError,
68 CollectionTypeError,
69 ConflictingDefinitionError,
70 DataIdValueError,
71 DatasetTypeError,
72 DatasetTypeExpressionError,
73 InconsistentDataIdError,
74 MissingCollectionError,
75 MissingDatasetTypeError,
76 NoDefaultCollectionError,
77 OrphanedRecordError,
78)
79from ..interfaces import ButlerAttributeExistsError
81if TYPE_CHECKING:
82 from ..sql_registry import SqlRegistry
85class RegistryTests(ABC):
86 """Generic tests for the `SqlRegistry` class that can be subclassed to
87 generate tests for different configurations.
88 """
90 collectionsManager: str | None = None
91 """Name of the collections manager class, if subclass provides value for
92 this member then it overrides name specified in default configuration
93 (`str`).
94 """
96 datasetsManager: str | dict[str, str] | None = None
97 """Name or configuration dictionary of the datasets manager class, if
98 subclass provides value for this member then it overrides name specified
99 in default configuration (`str` or `dict`).
100 """
102 @classmethod
103 @abstractmethod
104 def getDataDir(cls) -> str:
105 """Return the root directory containing test data YAML files."""
106 raise NotImplementedError()
108 def makeRegistryConfig(self) -> RegistryConfig:
109 """Create RegistryConfig used to create a registry.
111 This method should be called by a subclass from `makeRegistry`.
112 Returned instance will be pre-configured based on the values of class
113 members, and default-configured for all other parameters. Subclasses
114 that need default configuration should just instantiate
115 `RegistryConfig` directly.
116 """
117 config = RegistryConfig()
118 if self.collectionsManager:
119 config["managers", "collections"] = self.collectionsManager
120 if self.datasetsManager:
121 config["managers", "datasets"] = self.datasetsManager
122 return config
124 @abstractmethod
125 def makeRegistry(self, share_repo_with: SqlRegistry | None = None) -> SqlRegistry | None:
126 """Return the SqlRegistry instance to be tested.
128 Parameters
129 ----------
130 share_repo_with : `SqlRegistry`, optional
131 If provided, the new registry should point to the same data
132 repository as this existing registry.
134 Returns
135 -------
136 registry : `SqlRegistry`
137 New `SqlRegistry` instance, or `None` *only* if `share_repo_with`
138 is not `None` and this test case does not support that argument
139 (e.g. it is impossible with in-memory SQLite DBs).
140 """
141 raise NotImplementedError()
143 def loadData(self, registry: SqlRegistry, filename: str) -> None:
144 """Load registry test data from ``getDataDir/<filename>``,
145 which should be a YAML import/export file.
147 Parameters
148 ----------
149 registry : `SqlRegistry`
150 The registry to load into.
151 filename : `str`
152 The name of the file to load.
153 """
154 from ...transfers import YamlRepoImportBackend
156 with open(os.path.join(self.getDataDir(), filename)) as stream:
157 backend = YamlRepoImportBackend(stream, registry)
158 backend.register()
159 backend.load(datastore=None)
161 def checkQueryResults(self, results, expected):
162 """Check that a query results object contains expected values.
164 Parameters
165 ----------
166 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
167 A lazy-evaluation query results object.
168 expected : `list`
169 A list of `DataCoordinate` o `DatasetRef` objects that should be
170 equal to results of the query, aside from ordering.
171 """
172 self.assertCountEqual(list(results), expected)
173 self.assertEqual(results.count(), len(expected))
174 if expected:
175 self.assertTrue(results.any())
176 else:
177 self.assertFalse(results.any())
179 def testOpaque(self):
180 """Tests for `SqlRegistry.registerOpaqueTable`,
181 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
182 `SqlRegistry.deleteOpaqueData`.
183 """
184 registry = self.makeRegistry()
185 table = "opaque_table_for_testing"
186 registry.registerOpaqueTable(
187 table,
188 spec=ddl.TableSpec(
189 fields=[
190 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
191 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
192 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
193 ],
194 ),
195 )
196 rows = [
197 {"id": 1, "name": "one", "count": None},
198 {"id": 2, "name": "two", "count": 5},
199 {"id": 3, "name": "three", "count": 6},
200 ]
201 registry.insertOpaqueData(table, *rows)
202 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
203 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
204 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
205 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
206 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
207 # Test very long IN clause which exceeds sqlite limit on number of
208 # parameters. SQLite says the limit is 32k but it looks like it is
209 # much higher.
210 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
211 # Two IN clauses, each longer than 1k batch size, first with
212 # duplicates, second has matching elements in different batches (after
213 # sorting).
214 self.assertEqual(
215 rows[0:2],
216 list(
217 registry.fetchOpaqueData(
218 table,
219 id=list(range(1000)) + list(range(100, 0, -1)),
220 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
221 )
222 ),
223 )
224 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
225 registry.deleteOpaqueData(table, id=3)
226 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
227 registry.deleteOpaqueData(table)
228 self.assertEqual([], list(registry.fetchOpaqueData(table)))
230 def testDatasetType(self):
231 """Tests for `SqlRegistry.registerDatasetType` and
232 `SqlRegistry.getDatasetType`.
233 """
234 registry = self.makeRegistry()
235 # Check valid insert
236 datasetTypeName = "test"
237 storageClass = StorageClass("testDatasetType")
238 registry.storageClasses.registerStorageClass(storageClass)
239 dimensions = registry.dimensions.conform(("instrument", "visit"))
240 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
241 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
242 # Inserting for the first time should return True
243 self.assertTrue(registry.registerDatasetType(inDatasetType))
244 outDatasetType1 = registry.getDatasetType(datasetTypeName)
245 self.assertEqual(outDatasetType1, inDatasetType)
247 # Re-inserting should work
248 self.assertFalse(registry.registerDatasetType(inDatasetType))
249 # Except when they are not identical
250 with self.assertRaises(ConflictingDefinitionError):
251 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
252 registry.registerDatasetType(nonIdenticalDatasetType)
254 # Template can be None
255 datasetTypeName = "testNoneTemplate"
256 storageClass = StorageClass("testDatasetType2")
257 registry.storageClasses.registerStorageClass(storageClass)
258 dimensions = registry.dimensions.conform(("instrument", "visit"))
259 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
260 registry.registerDatasetType(inDatasetType)
261 outDatasetType2 = registry.getDatasetType(datasetTypeName)
262 self.assertEqual(outDatasetType2, inDatasetType)
264 allTypes = set(registry.queryDatasetTypes())
265 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
267 def testDimensions(self):
268 """Tests for `SqlRegistry.insertDimensionData`,
269 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
270 """
271 registry = self.makeRegistry()
272 dimensionName = "instrument"
273 dimension = registry.dimensions[dimensionName]
274 dimensionValue = {
275 "name": "DummyCam",
276 "visit_max": 10,
277 "visit_system": 0,
278 "exposure_max": 10,
279 "detector_max": 2,
280 "class_name": "lsst.pipe.base.Instrument",
281 }
282 registry.insertDimensionData(dimensionName, dimensionValue)
283 # Inserting the same value twice should fail
284 with self.assertRaises(sqlalchemy.exc.IntegrityError):
285 registry.insertDimensionData(dimensionName, dimensionValue)
286 # expandDataId should retrieve the record we just inserted
287 self.assertEqual(
288 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
289 .records[dimensionName]
290 .toDict(),
291 dimensionValue,
292 )
293 # expandDataId should raise if there is no record with the given ID.
294 with self.assertRaises(DataIdValueError):
295 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
296 # band doesn't have a table; insert should fail.
297 with self.assertRaises(TypeError):
298 registry.insertDimensionData("band", {"band": "i"})
299 dimensionName2 = "physical_filter"
300 dimension2 = registry.dimensions[dimensionName2]
301 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
302 # Missing required dependency ("instrument") should fail
303 with self.assertRaises(KeyError):
304 registry.insertDimensionData(dimensionName2, dimensionValue2)
305 # Adding required dependency should fix the failure
306 dimensionValue2["instrument"] = "DummyCam"
307 registry.insertDimensionData(dimensionName2, dimensionValue2)
308 # expandDataId should retrieve the record we just inserted.
309 self.assertEqual(
310 registry.expandDataId(
311 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
312 )
313 .records[dimensionName2]
314 .toDict(),
315 dimensionValue2,
316 )
317 # Use syncDimensionData to insert a new record successfully.
318 dimensionName3 = "detector"
319 dimensionValue3 = {
320 "instrument": "DummyCam",
321 "id": 1,
322 "full_name": "one",
323 "name_in_raft": "zero",
324 "purpose": "SCIENCE",
325 }
326 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
327 # Sync that again. Note that one field ("raft") is NULL, and that
328 # should be okay.
329 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
330 # Now try that sync with the same primary key but a different value.
331 # This should fail.
332 with self.assertRaises(ConflictingDefinitionError):
333 registry.syncDimensionData(
334 dimensionName3,
335 {
336 "instrument": "DummyCam",
337 "id": 1,
338 "full_name": "one",
339 "name_in_raft": "four",
340 "purpose": "SCIENCE",
341 },
342 )
344 @unittest.skipIf(np is None, "numpy not available.")
345 def testNumpyDataId(self):
346 """Test that we can use a numpy int in a dataId."""
347 registry = self.makeRegistry()
348 dimensionEntries = [
349 ("instrument", {"instrument": "DummyCam"}),
350 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
351 # Using an np.int64 here fails unless Records.fromDict is also
352 # patched to look for numbers.Integral
353 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
354 ]
355 for args in dimensionEntries:
356 registry.insertDimensionData(*args)
358 # Try a normal integer and something that looks like an int but
359 # is not.
360 for visit_id in (42, np.int64(42)):
361 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
362 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
363 self.assertEqual(expanded["visit"], int(visit_id))
364 self.assertIsInstance(expanded["visit"], int)
366 def testDataIdRelationships(self):
367 """Test that `SqlRegistry.expandDataId` raises an exception when the
368 given keys are inconsistent.
369 """
370 registry = self.makeRegistry()
371 self.loadData(registry, "base.yaml")
372 # Insert a few more dimension records for the next test.
373 registry.insertDimensionData(
374 "exposure",
375 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
376 )
377 registry.insertDimensionData(
378 "exposure",
379 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
380 )
381 registry.insertDimensionData(
382 "visit_system",
383 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
384 )
385 registry.insertDimensionData(
386 "visit",
387 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
388 )
389 registry.insertDimensionData(
390 "visit_definition",
391 {"instrument": "Cam1", "visit": 1, "exposure": 1},
392 )
393 with self.assertRaises(InconsistentDataIdError):
394 registry.expandDataId(
395 {"instrument": "Cam1", "visit": 1, "exposure": 2},
396 )
398 def testDataset(self):
399 """Basic tests for `SqlRegistry.insertDatasets`,
400 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
401 """
402 registry = self.makeRegistry()
403 self.loadData(registry, "base.yaml")
404 run = "tésτ"
405 registry.registerRun(run)
406 datasetType = registry.getDatasetType("bias")
407 dataId = {"instrument": "Cam1", "detector": 2}
408 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
409 outRef = registry.getDataset(ref.id)
410 self.assertIsNotNone(ref.id)
411 self.assertEqual(ref, outRef)
412 with self.assertRaises(ConflictingDefinitionError):
413 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
414 registry.removeDatasets([ref])
415 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
417 def testFindDataset(self):
418 """Tests for `SqlRegistry.findDataset`."""
419 registry = self.makeRegistry()
420 self.loadData(registry, "base.yaml")
421 run = "tésτ"
422 datasetType = registry.getDatasetType("bias")
423 dataId = {"instrument": "Cam1", "detector": 4}
424 registry.registerRun(run)
425 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
426 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
427 self.assertEqual(outputRef, inputRef)
428 # Check that retrieval with invalid dataId raises
429 with self.assertRaises(LookupError):
430 dataId = {"instrument": "Cam1"} # no detector
431 registry.findDataset(datasetType, dataId, collections=run)
432 # Check that different dataIds match to different datasets
433 dataId1 = {"instrument": "Cam1", "detector": 1}
434 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
435 dataId2 = {"instrument": "Cam1", "detector": 2}
436 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
437 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
438 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
439 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
440 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
441 # Check that requesting a non-existing dataId returns None
442 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
443 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
444 # Search more than one collection, in which two have the right
445 # dataset type and another does not.
446 registry.registerRun("empty")
447 self.loadData(registry, "datasets.yaml")
448 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
449 self.assertIsNotNone(bias1)
450 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
451 self.assertIsNotNone(bias2)
452 self.assertEqual(
453 bias1,
454 registry.findDataset(
455 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
456 ),
457 )
458 self.assertEqual(
459 bias2,
460 registry.findDataset(
461 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
462 ),
463 )
464 # Search more than one collection, with one of them a CALIBRATION
465 # collection.
466 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
467 timespan = Timespan(
468 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
469 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
470 )
471 registry.certify("Cam1/calib", [bias2], timespan=timespan)
472 self.assertEqual(
473 bias1,
474 registry.findDataset(
475 "bias",
476 instrument="Cam1",
477 detector=2,
478 collections=["empty", "imported_g", "Cam1/calib"],
479 timespan=timespan,
480 ),
481 )
482 self.assertEqual(
483 bias2,
484 registry.findDataset(
485 "bias",
486 instrument="Cam1",
487 detector=2,
488 collections=["empty", "Cam1/calib", "imported_g"],
489 timespan=timespan,
490 ),
491 )
492 # If we try to search those same collections without a timespan, it
493 # should still work, since the CALIBRATION collection is ignored.
494 self.assertEqual(
495 bias1,
496 registry.findDataset(
497 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
498 ),
499 )
500 self.assertEqual(
501 bias1,
502 registry.findDataset(
503 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
504 ),
505 )
507 def testRemoveDatasetTypeSuccess(self):
508 """Test that SqlRegistry.removeDatasetType works when there are no
509 datasets of that type present.
510 """
511 registry = self.makeRegistry()
512 self.loadData(registry, "base.yaml")
513 registry.removeDatasetType("flat")
514 with self.assertRaises(MissingDatasetTypeError):
515 registry.getDatasetType("flat")
517 def testRemoveDatasetTypeFailure(self):
518 """Test that SqlRegistry.removeDatasetType raises when there are
519 datasets of that type present or if the dataset type is for a
520 component.
521 """
522 registry = self.makeRegistry()
523 self.loadData(registry, "base.yaml")
524 self.loadData(registry, "datasets.yaml")
525 with self.assertRaises(OrphanedRecordError):
526 registry.removeDatasetType("flat")
527 with self.assertRaises(DatasetTypeError):
528 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
530 def testImportDatasetsUUID(self):
531 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
532 if isinstance(self.datasetsManager, str):
533 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
534 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
535 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
536 ".ByDimensionsDatasetRecordStorageManagerUUID"
537 ):
538 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
540 registry = self.makeRegistry()
541 self.loadData(registry, "base.yaml")
542 for run in range(6):
543 registry.registerRun(f"run{run}")
544 datasetTypeBias = registry.getDatasetType("bias")
545 datasetTypeFlat = registry.getDatasetType("flat")
546 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
547 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
548 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
550 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
551 (ref1,) = registry._importDatasets([ref])
552 # UUID is used without change
553 self.assertEqual(ref.id, ref1.id)
555 # All different failure modes
556 refs = (
557 # Importing same DatasetRef with different dataset ID is an error
558 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
559 # Same DatasetId but different DataId
560 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
561 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
562 # Same DatasetRef and DatasetId but different run
563 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
564 )
565 for ref in refs:
566 with self.assertRaises(ConflictingDefinitionError):
567 registry._importDatasets([ref])
569 # Test for non-unique IDs, they can be re-imported multiple times.
570 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
571 with self.subTest(idGenMode=idGenMode):
572 # Make dataset ref with reproducible dataset ID.
573 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
574 (ref1,) = registry._importDatasets([ref])
575 self.assertIsInstance(ref1.id, uuid.UUID)
576 self.assertEqual(ref1.id.version, 5)
577 self.assertEqual(ref1.id, ref.id)
579 # Importing it again is OK
580 (ref2,) = registry._importDatasets([ref1])
581 self.assertEqual(ref2.id, ref1.id)
583 # Cannot import to different run with the same ID
584 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
585 with self.assertRaises(ConflictingDefinitionError):
586 registry._importDatasets([ref])
588 ref = DatasetRef(
589 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
590 )
591 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
592 # Cannot import same DATAID_TYPE ref into a new run
593 with self.assertRaises(ConflictingDefinitionError):
594 (ref2,) = registry._importDatasets([ref])
595 else:
596 # DATAID_TYPE_RUN ref can be imported into a new run
597 (ref2,) = registry._importDatasets([ref])
599 def testComponentLookups(self):
600 """Test searching for component datasets via their parents.
602 Components can no longer be found by registry. This test checks
603 that this now fails.
604 """
605 registry = self.makeRegistry()
606 self.loadData(registry, "base.yaml")
607 self.loadData(registry, "datasets.yaml")
608 # Test getting the child dataset type (which does still exist in the
609 # Registry), and check for consistency with
610 # DatasetRef.makeComponentRef.
611 collection = "imported_g"
612 parentType = registry.getDatasetType("bias")
613 childType = registry.getDatasetType("bias.wcs")
614 parentRefResolved = registry.findDataset(
615 parentType, collections=collection, instrument="Cam1", detector=1
616 )
617 self.assertIsInstance(parentRefResolved, DatasetRef)
618 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
619 # Search for a single dataset with findDataset.
620 with self.assertRaises(DatasetTypeError):
621 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
623 def testCollections(self):
624 """Tests for registry methods that manage collections."""
625 registry = self.makeRegistry()
626 other_registry = self.makeRegistry(share_repo_with=registry)
627 self.loadData(registry, "base.yaml")
628 self.loadData(registry, "datasets.yaml")
629 run1 = "imported_g"
630 run2 = "imported_r"
631 # Test setting a collection docstring after it has been created.
632 registry.setCollectionDocumentation(run1, "doc for run1")
633 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
634 registry.setCollectionDocumentation(run1, None)
635 self.assertIsNone(registry.getCollectionDocumentation(run1))
636 datasetType = "bias"
637 # Find some datasets via their run's collection.
638 dataId1 = {"instrument": "Cam1", "detector": 1}
639 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
640 self.assertIsNotNone(ref1)
641 dataId2 = {"instrument": "Cam1", "detector": 2}
642 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
643 self.assertIsNotNone(ref2)
644 # Associate those into a new collection, then look for them there.
645 tag1 = "tag1"
646 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
647 # Check that we can query for old and new collections by type.
648 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
649 self.assertEqual(
650 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
651 {tag1, run1, run2},
652 )
653 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
654 registry.associate(tag1, [ref1, ref2])
655 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
656 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
657 # Disassociate one and verify that we can't it there anymore...
658 registry.disassociate(tag1, [ref1])
659 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
660 # ...but we can still find ref2 in tag1, and ref1 in the run.
661 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
662 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
663 collections = set(registry.queryCollections())
664 self.assertEqual(collections, {run1, run2, tag1})
665 # Associate both refs into tag1 again; ref2 is already there, but that
666 # should be a harmless no-op.
667 registry.associate(tag1, [ref1, ref2])
668 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
669 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
670 # Get a different dataset (from a different run) that has the same
671 # dataset type and data ID as ref2.
672 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
673 self.assertNotEqual(ref2, ref2b)
674 # Attempting to associate that into tag1 should be an error.
675 with self.assertRaises(ConflictingDefinitionError):
676 registry.associate(tag1, [ref2b])
677 # That error shouldn't have messed up what we had before.
678 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
679 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
680 # Attempt to associate the conflicting dataset again, this time with
681 # a dataset that isn't in the collection and won't cause a conflict.
682 # Should also fail without modifying anything.
683 dataId3 = {"instrument": "Cam1", "detector": 3}
684 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
685 with self.assertRaises(ConflictingDefinitionError):
686 registry.associate(tag1, [ref3, ref2b])
687 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
688 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
689 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
690 # Register a chained collection that searches [tag1, run2]
691 chain1 = "chain1"
692 registry.registerCollection(chain1, type=CollectionType.CHAINED)
693 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
694 # Chained collection exists, but has no collections in it.
695 self.assertFalse(registry.getCollectionChain(chain1))
696 # If we query for all collections, we should get the chained collection
697 # only if we don't ask to flatten it (i.e. yield only its children).
698 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
699 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
700 # Attempt to set its child collections to something circular; that
701 # should fail.
702 with self.assertRaises(ValueError):
703 registry.setCollectionChain(chain1, [tag1, chain1])
704 # Add the child collections.
705 registry.setCollectionChain(chain1, [tag1, run2])
706 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
707 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
708 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
709 # Refresh the other registry that points to the same repo, and make
710 # sure it can see the things we've done (note that this does require
711 # an explicit refresh(); that's the documented behavior, because
712 # caching is ~impossible otherwise).
713 if other_registry is not None:
714 other_registry.refresh()
715 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
716 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
717 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
718 # Searching for dataId1 or dataId2 in the chain should return ref1 and
719 # ref2, because both are in tag1.
720 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
721 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
722 # Now disassociate ref2 from tag1. The search (for bias) with
723 # dataId2 in chain1 should then:
724 # 1. not find it in tag1
725 # 2. find a different dataset in run2
726 registry.disassociate(tag1, [ref2])
727 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
728 self.assertNotEqual(ref2b, ref2)
729 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
730 # Define a new chain so we can test recursive chains.
731 chain2 = "chain2"
732 registry.registerCollection(chain2, type=CollectionType.CHAINED)
733 registry.setCollectionChain(chain2, [run2, chain1])
734 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
735 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
736 # Query for collections matching a regex.
737 self.assertCountEqual(
738 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
739 ["imported_r", "imported_g"],
740 )
741 # Query for collections matching a regex or an explicit str.
742 self.assertCountEqual(
743 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
744 ["imported_r", "imported_g", "chain1"],
745 )
746 # Search for bias with dataId1 should find it via tag1 in chain2,
747 # recursing, because is not in run1.
748 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
749 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
750 # Search for bias with dataId2 should find it in run2 (ref2b).
751 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
752 # Search for a flat that is in run2. That should not be found
753 # at the front of chain2, because of the restriction to bias
754 # on run2 there, but it should be found in at the end of chain1.
755 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
756 ref4 = registry.findDataset("flat", dataId4, collections=run2)
757 self.assertIsNotNone(ref4)
758 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
759 # Deleting a collection that's part of a CHAINED collection is not
760 # allowed, and is exception-safe.
761 with self.assertRaises(sqlalchemy.exc.IntegrityError):
762 registry.removeCollection(run2)
763 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
764 with self.assertRaises(sqlalchemy.exc.IntegrityError):
765 registry.removeCollection(chain1)
766 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
767 # Actually remove chain2, test that it's gone by asking for its type.
768 registry.removeCollection(chain2)
769 with self.assertRaises(MissingCollectionError):
770 registry.getCollectionType(chain2)
771 # Actually remove run2 and chain1, which should work now.
772 registry.removeCollection(chain1)
773 registry.removeCollection(run2)
774 with self.assertRaises(MissingCollectionError):
775 registry.getCollectionType(run2)
776 with self.assertRaises(MissingCollectionError):
777 registry.getCollectionType(chain1)
778 # Remove tag1 as well, just to test that we can remove TAGGED
779 # collections.
780 registry.removeCollection(tag1)
781 with self.assertRaises(MissingCollectionError):
782 registry.getCollectionType(tag1)
784 def testCollectionChainFlatten(self):
785 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
786 option.
787 """
788 registry = self.makeRegistry()
789 registry.registerCollection("inner", CollectionType.CHAINED)
790 registry.registerCollection("innermost", CollectionType.RUN)
791 registry.setCollectionChain("inner", ["innermost"])
792 registry.registerCollection("outer", CollectionType.CHAINED)
793 registry.setCollectionChain("outer", ["inner"], flatten=False)
794 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
795 registry.setCollectionChain("outer", ["inner"], flatten=True)
796 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
798 def testBasicTransaction(self):
799 """Test that all operations within a single transaction block are
800 rolled back if an exception propagates out of the block.
801 """
802 registry = self.makeRegistry()
803 storageClass = StorageClass("testDatasetType")
804 registry.storageClasses.registerStorageClass(storageClass)
805 with registry.transaction():
806 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
807 with self.assertRaises(ValueError):
808 with registry.transaction():
809 registry.insertDimensionData("instrument", {"name": "Cam2"})
810 raise ValueError("Oops, something went wrong")
811 # Cam1 should exist
812 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
813 # But Cam2 and Cam3 should both not exist
814 with self.assertRaises(DataIdValueError):
815 registry.expandDataId(instrument="Cam2")
816 with self.assertRaises(DataIdValueError):
817 registry.expandDataId(instrument="Cam3")
819 def testNestedTransaction(self):
820 """Test that operations within a transaction block are not rolled back
821 if an exception propagates out of an inner transaction block and is
822 then caught.
823 """
824 registry = self.makeRegistry()
825 dimension = registry.dimensions["instrument"]
826 dataId1 = {"instrument": "DummyCam"}
827 dataId2 = {"instrument": "DummyCam2"}
828 checkpointReached = False
829 with registry.transaction():
830 # This should be added and (ultimately) committed.
831 registry.insertDimensionData(dimension, dataId1)
832 with self.assertRaises(sqlalchemy.exc.IntegrityError):
833 with registry.transaction(savepoint=True):
834 # This does not conflict, and should succeed (but not
835 # be committed).
836 registry.insertDimensionData(dimension, dataId2)
837 checkpointReached = True
838 # This should conflict and raise, triggerring a rollback
839 # of the previous insertion within the same transaction
840 # context, but not the original insertion in the outer
841 # block.
842 registry.insertDimensionData(dimension, dataId1)
843 self.assertTrue(checkpointReached)
844 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
845 with self.assertRaises(DataIdValueError):
846 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
848 def testInstrumentDimensions(self):
849 """Test queries involving only instrument dimensions, with no joins to
850 skymap.
851 """
852 registry = self.makeRegistry()
854 # need a bunch of dimensions and datasets for test
855 registry.insertDimensionData(
856 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
857 )
858 registry.insertDimensionData(
859 "physical_filter",
860 dict(instrument="DummyCam", name="dummy_r", band="r"),
861 dict(instrument="DummyCam", name="dummy_i", band="i"),
862 )
863 registry.insertDimensionData(
864 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
865 )
866 registry.insertDimensionData(
867 "visit",
868 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
869 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
870 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
871 )
872 for i in range(1, 6):
873 registry.insertDimensionData(
874 "visit_detector_region",
875 dict(instrument="DummyCam", visit=10, detector=i),
876 dict(instrument="DummyCam", visit=11, detector=i),
877 dict(instrument="DummyCam", visit=20, detector=i),
878 )
879 registry.insertDimensionData(
880 "exposure",
881 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
882 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
883 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
884 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
885 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
886 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
887 )
888 registry.insertDimensionData(
889 "visit_definition",
890 dict(instrument="DummyCam", exposure=100, visit=10),
891 dict(instrument="DummyCam", exposure=101, visit=10),
892 dict(instrument="DummyCam", exposure=110, visit=11),
893 dict(instrument="DummyCam", exposure=111, visit=11),
894 dict(instrument="DummyCam", exposure=200, visit=20),
895 dict(instrument="DummyCam", exposure=201, visit=20),
896 )
897 # dataset types
898 run1 = "test1_r"
899 run2 = "test2_r"
900 tagged2 = "test2_t"
901 registry.registerRun(run1)
902 registry.registerRun(run2)
903 registry.registerCollection(tagged2)
904 storageClass = StorageClass("testDataset")
905 registry.storageClasses.registerStorageClass(storageClass)
906 rawType = DatasetType(
907 name="RAW",
908 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
909 storageClass=storageClass,
910 )
911 registry.registerDatasetType(rawType)
912 calexpType = DatasetType(
913 name="CALEXP",
914 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
915 storageClass=storageClass,
916 )
917 registry.registerDatasetType(calexpType)
919 # add pre-existing datasets
920 for exposure in (100, 101, 110, 111):
921 for detector in (1, 2, 3):
922 # note that only 3 of 5 detectors have datasets
923 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
924 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
925 # exposures 100 and 101 appear in both run1 and tagged2.
926 # 100 has different datasets in the different collections
927 # 101 has the same dataset in both collections.
928 if exposure == 100:
929 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
930 if exposure in (100, 101):
931 registry.associate(tagged2, [ref])
932 # Add pre-existing datasets to tagged2.
933 for exposure in (200, 201):
934 for detector in (3, 4, 5):
935 # note that only 3 of 5 detectors have datasets
936 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
937 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
938 registry.associate(tagged2, [ref])
940 dimensions = registry.dimensions.conform(
941 rawType.dimensions.required.names | calexpType.dimensions.required.names
942 )
943 # Test that single dim string works as well as list of str
944 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
945 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
946 self.assertEqual(rows, rowsI)
947 # with empty expression
948 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
949 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
950 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
951 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
952 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
954 # second collection
955 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
956 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
957 for dataId in rows:
958 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
959 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
960 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
961 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
963 # with two input datasets
964 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
965 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
966 for dataId in rows:
967 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
968 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
969 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
970 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
972 # limit to single visit
973 rows = registry.queryDataIds(
974 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
975 ).toSet()
976 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
977 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
978 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
979 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
981 # more limiting expression, using link names instead of Table.column
982 rows = registry.queryDataIds(
983 dimensions,
984 datasets=rawType,
985 collections=run1,
986 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
987 ).toSet()
988 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
989 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
990 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
991 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
993 # queryDataIds with only one of `datasets` and `collections` is an
994 # error.
995 with self.assertRaises(CollectionError):
996 registry.queryDataIds(dimensions, datasets=rawType)
997 with self.assertRaises(ArgumentError):
998 registry.queryDataIds(dimensions, collections=run1)
1000 # expression excludes everything
1001 rows = registry.queryDataIds(
1002 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1003 ).toSet()
1004 self.assertEqual(len(rows), 0)
1006 # Selecting by physical_filter, this is not in the dimensions, but it
1007 # is a part of the full expression so it should work too.
1008 rows = registry.queryDataIds(
1009 dimensions,
1010 datasets=rawType,
1011 collections=run1,
1012 where="physical_filter = 'dummy_r'",
1013 instrument="DummyCam",
1014 ).toSet()
1015 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1016 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1017 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1018 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1020 def testSkyMapDimensions(self):
1021 """Tests involving only skymap dimensions, no joins to instrument."""
1022 registry = self.makeRegistry()
1024 # need a bunch of dimensions and datasets for test, we want
1025 # "band" in the test so also have to add physical_filter
1026 # dimensions
1027 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1028 registry.insertDimensionData(
1029 "physical_filter",
1030 dict(instrument="DummyCam", name="dummy_r", band="r"),
1031 dict(instrument="DummyCam", name="dummy_i", band="i"),
1032 )
1033 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1034 for tract in range(10):
1035 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1036 registry.insertDimensionData(
1037 "patch",
1038 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1039 )
1041 # dataset types
1042 run = "tésτ"
1043 registry.registerRun(run)
1044 storageClass = StorageClass("testDataset")
1045 registry.storageClasses.registerStorageClass(storageClass)
1046 calexpType = DatasetType(
1047 name="deepCoadd_calexp",
1048 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1049 storageClass=storageClass,
1050 )
1051 registry.registerDatasetType(calexpType)
1052 mergeType = DatasetType(
1053 name="deepCoadd_mergeDet",
1054 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1055 storageClass=storageClass,
1056 )
1057 registry.registerDatasetType(mergeType)
1058 measType = DatasetType(
1059 name="deepCoadd_meas",
1060 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1061 storageClass=storageClass,
1062 )
1063 registry.registerDatasetType(measType)
1065 dimensions = registry.dimensions.conform(
1066 calexpType.dimensions.required.names
1067 | mergeType.dimensions.required.names
1068 | measType.dimensions.required.names
1069 )
1071 # add pre-existing datasets
1072 for tract in (1, 3, 5):
1073 for patch in (2, 4, 6, 7):
1074 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1075 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1076 for aFilter in ("i", "r"):
1077 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1078 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1080 # with empty expression
1081 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1082 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1083 for dataId in rows:
1084 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1085 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1086 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1087 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1089 # limit to 2 tracts and 2 patches
1090 rows = registry.queryDataIds(
1091 dimensions,
1092 datasets=[calexpType, mergeType],
1093 collections=run,
1094 where="tract IN (1, 5) AND patch IN (2, 7)",
1095 skymap="DummyMap",
1096 ).toSet()
1097 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1098 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1099 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1100 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1102 # limit to single filter
1103 rows = registry.queryDataIds(
1104 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1105 ).toSet()
1106 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1107 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1108 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1109 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1111 # Specifying non-existing skymap is an exception
1112 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1113 rows = registry.queryDataIds(
1114 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1115 ).toSet()
1117 def testSpatialJoin(self):
1118 """Test queries that involve spatial overlap joins."""
1119 registry = self.makeRegistry()
1120 self.loadData(registry, "hsc-rc2-subset.yaml")
1122 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1123 # the TopologicalFamily they belong to. We'll relate all elements in
1124 # each family to all of the elements in each other family.
1125 families = defaultdict(set)
1126 # Dictionary of {element.name: {dataId: region}}.
1127 regions = {}
1128 for element in registry.dimensions.database_elements:
1129 if element.spatial is not None:
1130 families[element.spatial.name].add(element)
1131 regions[element.name] = {
1132 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1133 }
1135 # If this check fails, it's not necessarily a problem - it may just be
1136 # a reasonable change to the default dimension definitions - but the
1137 # test below depends on there being more than one family to do anything
1138 # useful.
1139 self.assertEqual(len(families), 2)
1141 # Overlap DatabaseDimensionElements with each other.
1142 for family1, family2 in itertools.combinations(families, 2):
1143 for element1, element2 in itertools.product(families[family1], families[family2]):
1144 dimensions = element1.minimal_group | element2.minimal_group
1145 # Construct expected set of overlapping data IDs via a
1146 # brute-force comparison of the regions we've already fetched.
1147 expected = {
1148 DataCoordinate.standardize(
1149 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1150 )
1151 for (dataId1, region1), (dataId2, region2) in itertools.product(
1152 regions[element1.name].items(), regions[element2.name].items()
1153 )
1154 if not region1.isDisjointFrom(region2)
1155 }
1156 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1157 queried = set(registry.queryDataIds(dimensions))
1158 self.assertEqual(expected, queried)
1160 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1161 commonSkyPix = registry.dimensions.commonSkyPix
1162 for elementName, these_regions in regions.items():
1163 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1164 expected = set()
1165 for dataId, region in these_regions.items():
1166 for begin, end in commonSkyPix.pixelization.envelope(region):
1167 expected.update(
1168 DataCoordinate.standardize(
1169 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1170 )
1171 for index in range(begin, end)
1172 )
1173 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1174 queried = set(registry.queryDataIds(dimensions))
1175 self.assertEqual(expected, queried)
1177 def testAbstractQuery(self):
1178 """Test that we can run a query that just lists the known
1179 bands. This is tricky because band is
1180 backed by a query against physical_filter.
1181 """
1182 registry = self.makeRegistry()
1183 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1184 registry.insertDimensionData(
1185 "physical_filter",
1186 dict(instrument="DummyCam", name="dummy_i", band="i"),
1187 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1188 dict(instrument="DummyCam", name="dummy_r", band="r"),
1189 )
1190 rows = registry.queryDataIds(["band"]).toSet()
1191 self.assertCountEqual(
1192 rows,
1193 [
1194 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1195 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1196 ],
1197 )
1199 def testAttributeManager(self):
1200 """Test basic functionality of attribute manager."""
1201 # number of attributes with schema versions in a fresh database,
1202 # 6 managers with 2 records per manager, plus config for dimensions
1203 VERSION_COUNT = 6 * 2 + 1
1205 registry = self.makeRegistry()
1206 attributes = registry._managers.attributes
1208 # check what get() returns for non-existing key
1209 self.assertIsNone(attributes.get("attr"))
1210 self.assertEqual(attributes.get("attr", ""), "")
1211 self.assertEqual(attributes.get("attr", "Value"), "Value")
1212 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1214 # cannot store empty key or value
1215 with self.assertRaises(ValueError):
1216 attributes.set("", "value")
1217 with self.assertRaises(ValueError):
1218 attributes.set("attr", "")
1220 # set value of non-existing key
1221 attributes.set("attr", "value")
1222 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1223 self.assertEqual(attributes.get("attr"), "value")
1225 # update value of existing key
1226 with self.assertRaises(ButlerAttributeExistsError):
1227 attributes.set("attr", "value2")
1229 attributes.set("attr", "value2", force=True)
1230 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1231 self.assertEqual(attributes.get("attr"), "value2")
1233 # delete existing key
1234 self.assertTrue(attributes.delete("attr"))
1235 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1237 # delete non-existing key
1238 self.assertFalse(attributes.delete("non-attr"))
1240 # store bunch of keys and get the list back
1241 data = [
1242 ("version.core", "1.2.3"),
1243 ("version.dimensions", "3.2.1"),
1244 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1245 ]
1246 for key, value in data:
1247 attributes.set(key, value)
1248 items = dict(attributes.items())
1249 for key, value in data:
1250 self.assertEqual(items[key], value)
1252 def testQueryDatasetsDeduplication(self):
1253 """Test that the findFirst option to queryDatasets selects datasets
1254 from collections in the order given".
1255 """
1256 registry = self.makeRegistry()
1257 self.loadData(registry, "base.yaml")
1258 self.loadData(registry, "datasets.yaml")
1259 self.assertCountEqual(
1260 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1261 [
1262 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1263 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1264 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1265 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1266 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1267 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1268 ],
1269 )
1270 self.assertCountEqual(
1271 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1272 [
1273 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1274 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1275 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1276 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1277 ],
1278 )
1279 self.assertCountEqual(
1280 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1281 [
1282 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1283 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1284 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1285 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1286 ],
1287 )
1289 def testQueryResults(self):
1290 """Test querying for data IDs and then manipulating the QueryResults
1291 object returned to perform other queries.
1292 """
1293 registry = self.makeRegistry()
1294 self.loadData(registry, "base.yaml")
1295 self.loadData(registry, "datasets.yaml")
1296 bias = registry.getDatasetType("bias")
1297 flat = registry.getDatasetType("flat")
1298 # Obtain expected results from methods other than those we're testing
1299 # here. That includes:
1300 # - the dimensions of the data IDs we want to query:
1301 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1302 # - the dimensions of some other data IDs we'll extract from that:
1303 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1304 # - the data IDs we expect to obtain from the first queries:
1305 expectedDataIds = DataCoordinateSet(
1306 {
1307 DataCoordinate.standardize(
1308 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1309 )
1310 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1311 },
1312 dimensions=expected_dimensions,
1313 hasFull=False,
1314 hasRecords=False,
1315 )
1316 # - the flat datasets we expect to find from those data IDs, in just
1317 # one collection (so deduplication is irrelevant):
1318 expectedFlats = [
1319 registry.findDataset(
1320 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1321 ),
1322 registry.findDataset(
1323 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1324 ),
1325 registry.findDataset(
1326 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1327 ),
1328 ]
1329 # - the data IDs we expect to extract from that:
1330 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1331 # - the bias datasets we expect to find from those data IDs, after we
1332 # subset-out the physical_filter dimension, both with duplicates:
1333 expectedAllBiases = [
1334 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1335 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1336 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1337 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1338 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1339 ]
1340 # - ...and without duplicates:
1341 expectedDeduplicatedBiases = [
1342 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1343 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1344 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1345 ]
1346 # Test against those expected results, using a "lazy" query for the
1347 # data IDs (which re-executes that query each time we use it to do
1348 # something new).
1349 dataIds = registry.queryDataIds(
1350 ["detector", "physical_filter"],
1351 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1352 instrument="Cam1",
1353 )
1354 self.assertEqual(dataIds.dimensions, expected_dimensions)
1355 self.assertEqual(dataIds.toSet(), expectedDataIds)
1356 self.assertCountEqual(
1357 list(
1358 dataIds.findDatasets(
1359 flat,
1360 collections=["imported_r"],
1361 )
1362 ),
1363 expectedFlats,
1364 )
1365 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1366 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1367 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1368 self.assertCountEqual(
1369 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1370 expectedAllBiases,
1371 )
1372 self.assertCountEqual(
1373 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1374 expectedDeduplicatedBiases,
1375 )
1377 # Searching for a dataset with dimensions we had projected away
1378 # restores those dimensions.
1379 self.assertCountEqual(
1380 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1381 expectedFlats,
1382 )
1384 # Use a named dataset type that does not exist and a dataset type
1385 # object that does not exist.
1386 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1388 # Test both string name and dataset type object.
1389 test_type: str | DatasetType
1390 for test_type, test_type_name in (
1391 (unknown_type, unknown_type.name),
1392 (unknown_type.name, unknown_type.name),
1393 ):
1394 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1395 list(
1396 subsetDataIds.findDatasets(
1397 test_type, collections=["imported_r", "imported_g"], findFirst=True
1398 )
1399 )
1401 # Materialize the bias dataset queries (only) by putting the results
1402 # into temporary tables, then repeat those tests.
1403 with subsetDataIds.findDatasets(
1404 bias, collections=["imported_r", "imported_g"], findFirst=False
1405 ).materialize() as biases:
1406 self.assertCountEqual(list(biases), expectedAllBiases)
1407 with subsetDataIds.findDatasets(
1408 bias, collections=["imported_r", "imported_g"], findFirst=True
1409 ).materialize() as biases:
1410 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1411 # Materialize the data ID subset query, but not the dataset queries.
1412 with subsetDataIds.materialize() as subsetDataIds:
1413 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1414 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1415 self.assertCountEqual(
1416 list(
1417 subsetDataIds.findDatasets(
1418 bias, collections=["imported_r", "imported_g"], findFirst=False
1419 )
1420 ),
1421 expectedAllBiases,
1422 )
1423 self.assertCountEqual(
1424 list(
1425 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1426 ),
1427 expectedDeduplicatedBiases,
1428 )
1429 # Materialize the dataset queries, too.
1430 with subsetDataIds.findDatasets(
1431 bias, collections=["imported_r", "imported_g"], findFirst=False
1432 ).materialize() as biases:
1433 self.assertCountEqual(list(biases), expectedAllBiases)
1434 with subsetDataIds.findDatasets(
1435 bias, collections=["imported_r", "imported_g"], findFirst=True
1436 ).materialize() as biases:
1437 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1438 # Materialize the original query, but none of the follow-up queries.
1439 with dataIds.materialize() as dataIds:
1440 self.assertEqual(dataIds.dimensions, expected_dimensions)
1441 self.assertEqual(dataIds.toSet(), expectedDataIds)
1442 self.assertCountEqual(
1443 list(
1444 dataIds.findDatasets(
1445 flat,
1446 collections=["imported_r"],
1447 )
1448 ),
1449 expectedFlats,
1450 )
1451 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1452 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1453 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1454 self.assertCountEqual(
1455 list(
1456 subsetDataIds.findDatasets(
1457 bias, collections=["imported_r", "imported_g"], findFirst=False
1458 )
1459 ),
1460 expectedAllBiases,
1461 )
1462 self.assertCountEqual(
1463 list(
1464 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1465 ),
1466 expectedDeduplicatedBiases,
1467 )
1468 # Materialize just the bias dataset queries.
1469 with subsetDataIds.findDatasets(
1470 bias, collections=["imported_r", "imported_g"], findFirst=False
1471 ).materialize() as biases:
1472 self.assertCountEqual(list(biases), expectedAllBiases)
1473 with subsetDataIds.findDatasets(
1474 bias, collections=["imported_r", "imported_g"], findFirst=True
1475 ).materialize() as biases:
1476 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1477 # Materialize the subset data ID query, but not the dataset
1478 # queries.
1479 with subsetDataIds.materialize() as subsetDataIds:
1480 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1481 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1482 self.assertCountEqual(
1483 list(
1484 subsetDataIds.findDatasets(
1485 bias, collections=["imported_r", "imported_g"], findFirst=False
1486 )
1487 ),
1488 expectedAllBiases,
1489 )
1490 self.assertCountEqual(
1491 list(
1492 subsetDataIds.findDatasets(
1493 bias, collections=["imported_r", "imported_g"], findFirst=True
1494 )
1495 ),
1496 expectedDeduplicatedBiases,
1497 )
1498 # Materialize the bias dataset queries, too, so now we're
1499 # materializing every single step.
1500 with subsetDataIds.findDatasets(
1501 bias, collections=["imported_r", "imported_g"], findFirst=False
1502 ).materialize() as biases:
1503 self.assertCountEqual(list(biases), expectedAllBiases)
1504 with subsetDataIds.findDatasets(
1505 bias, collections=["imported_r", "imported_g"], findFirst=True
1506 ).materialize() as biases:
1507 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1509 def testStorageClassPropagation(self):
1510 """Test that queries for datasets respect the storage class passed in
1511 as part of a full dataset type.
1512 """
1513 registry = self.makeRegistry()
1514 self.loadData(registry, "base.yaml")
1515 dataset_type_in_registry = DatasetType(
1516 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
1517 )
1518 registry.registerDatasetType(dataset_type_in_registry)
1519 run = "run1"
1520 registry.registerRun(run)
1521 (inserted_ref,) = registry.insertDatasets(
1522 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1523 )
1524 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1525 query_dataset_type = DatasetType(
1526 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
1527 )
1528 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1529 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1530 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1531 (query_datasets_ref,) = query_datasets_result
1532 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1533 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1534 query_dataset_type, collections=[run]
1535 )
1536 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1537 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1538 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1539 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1540 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1541 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1542 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1544 def testEmptyDimensionsQueries(self):
1545 """Test Query and QueryResults objects in the case where there are no
1546 dimensions.
1547 """
1548 # Set up test data: one dataset type, two runs, one dataset in each.
1549 registry = self.makeRegistry()
1550 self.loadData(registry, "base.yaml")
1551 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1552 registry.registerDatasetType(schema)
1553 dataId = DataCoordinate.make_empty(registry.dimensions)
1554 run1 = "run1"
1555 run2 = "run2"
1556 registry.registerRun(run1)
1557 registry.registerRun(run2)
1558 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1559 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1560 # Query directly for both of the datasets, and each one, one at a time.
1561 self.checkQueryResults(
1562 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1563 )
1564 self.checkQueryResults(
1565 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1566 [dataset1],
1567 )
1568 self.checkQueryResults(
1569 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1570 [dataset2],
1571 )
1572 # Query for data IDs with no dimensions.
1573 dataIds = registry.queryDataIds([])
1574 self.checkQueryResults(dataIds, [dataId])
1575 # Use queried data IDs to find the datasets.
1576 self.checkQueryResults(
1577 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1578 [dataset1, dataset2],
1579 )
1580 self.checkQueryResults(
1581 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1582 [dataset1],
1583 )
1584 self.checkQueryResults(
1585 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1586 [dataset2],
1587 )
1588 # Now materialize the data ID query results and repeat those tests.
1589 with dataIds.materialize() as dataIds:
1590 self.checkQueryResults(dataIds, [dataId])
1591 self.checkQueryResults(
1592 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1593 [dataset1],
1594 )
1595 self.checkQueryResults(
1596 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1597 [dataset2],
1598 )
1599 # Query for non-empty data IDs, then subset that to get the empty one.
1600 # Repeat the above tests starting from that.
1601 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1602 self.checkQueryResults(dataIds, [dataId])
1603 self.checkQueryResults(
1604 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1605 [dataset1, dataset2],
1606 )
1607 self.checkQueryResults(
1608 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1609 [dataset1],
1610 )
1611 self.checkQueryResults(
1612 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1613 [dataset2],
1614 )
1615 with dataIds.materialize() as dataIds:
1616 self.checkQueryResults(dataIds, [dataId])
1617 self.checkQueryResults(
1618 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1619 [dataset1, dataset2],
1620 )
1621 self.checkQueryResults(
1622 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1623 [dataset1],
1624 )
1625 self.checkQueryResults(
1626 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1627 [dataset2],
1628 )
1629 # Query for non-empty data IDs, then materialize, then subset to get
1630 # the empty one. Repeat again.
1631 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1632 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1633 self.checkQueryResults(dataIds, [dataId])
1634 self.checkQueryResults(
1635 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1636 [dataset1, dataset2],
1637 )
1638 self.checkQueryResults(
1639 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1640 [dataset1],
1641 )
1642 self.checkQueryResults(
1643 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1644 [dataset2],
1645 )
1646 with dataIds.materialize() as dataIds:
1647 self.checkQueryResults(dataIds, [dataId])
1648 self.checkQueryResults(
1649 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1650 [dataset1, dataset2],
1651 )
1652 self.checkQueryResults(
1653 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1654 [dataset1],
1655 )
1656 self.checkQueryResults(
1657 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1658 [dataset2],
1659 )
1660 # Repeat the materialization tests with a dimension element that isn't
1661 # cached, so there's no way we can know when building the query where
1662 # there are any rows are not (there aren't).
1663 dataIds = registry.queryDataIds(["exposure"]).subset(registry.dimensions.empty, unique=True)
1664 with dataIds.materialize() as dataIds:
1665 self.checkQueryResults(dataIds, [])
1666 self.checkQueryResults(
1667 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False), []
1668 )
1669 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True), [])
1670 self.checkQueryResults(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True), [])
1671 # Query for non-empty data IDs with a constraint on an empty-data-ID
1672 # dataset that exists.
1673 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1674 self.checkQueryResults(
1675 dataIds.subset(unique=True),
1676 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1677 )
1678 # Again query for non-empty data IDs with a constraint on empty-data-ID
1679 # datasets, but when the datasets don't exist. We delete the existing
1680 # dataset and query just that collection rather than creating a new
1681 # empty collection because this is a bit less likely for our build-time
1682 # logic to shortcut-out (via the collection summaries), and such a
1683 # shortcut would make this test a bit more trivial than we'd like.
1684 registry.removeDatasets([dataset2])
1685 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1686 self.checkQueryResults(dataIds, [])
1688 def testDimensionDataModifications(self):
1689 """Test that modifying dimension records via:
1690 syncDimensionData(..., update=True) and
1691 insertDimensionData(..., replace=True) works as expected, even in the
1692 presence of datasets using those dimensions and spatial overlap
1693 relationships.
1694 """
1696 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1697 """Unpack a sphgeom.RangeSet into the integers it contains."""
1698 for begin, end in ranges:
1699 yield from range(begin, end)
1701 def _range_set_hull(
1702 ranges: lsst.sphgeom.RangeSet,
1703 pixelization: lsst.sphgeom.HtmPixelization,
1704 ) -> lsst.sphgeom.ConvexPolygon:
1705 """Create a ConvexPolygon hull of the region defined by a set of
1706 HTM pixelization index ranges.
1707 """
1708 points = []
1709 for index in _unpack_range_set(ranges):
1710 points.extend(pixelization.triangle(index).getVertices())
1711 return lsst.sphgeom.ConvexPolygon(points)
1713 # Use HTM to set up an initial parent region (one arbitrary trixel)
1714 # and four child regions (the trixels within the parent at the next
1715 # level. We'll use the parent as a tract/visit region and the children
1716 # as its patch/visit_detector regions.
1717 registry = self.makeRegistry()
1718 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1719 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1720 index = 12288
1721 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1722 assert htm6.universe().contains(child_ranges_small)
1723 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
1724 parent_region_small = lsst.sphgeom.ConvexPolygon(
1725 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1726 )
1727 assert all(parent_region_small.contains(c) for c in child_regions_small)
1728 # Make a larger version of each child region, defined to be the set of
1729 # htm6 trixels that overlap the original's bounding circle. Make a new
1730 # parent that's the convex hull of the new children.
1731 child_regions_large = [
1732 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1733 ]
1734 assert all(
1735 large.contains(small)
1736 for large, small in zip(child_regions_large, child_regions_small, strict=True)
1737 )
1738 parent_region_large = lsst.sphgeom.ConvexPolygon(
1739 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1740 )
1741 assert all(parent_region_large.contains(c) for c in child_regions_large)
1742 assert parent_region_large.contains(parent_region_small)
1743 assert not parent_region_small.contains(parent_region_large)
1744 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1745 # Find some commonSkyPix indices that overlap the large regions but not
1746 # overlap the small regions. We use commonSkyPix here to make sure the
1747 # real tests later involve what's in the database, not just post-query
1748 # filtering of regions.
1749 child_difference_indices = []
1750 for large, small in zip(child_regions_large, child_regions_small, strict=True):
1751 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1752 assert difference, "if this is empty, we can't test anything useful with these regions"
1753 assert all(
1754 not commonSkyPix.triangle(d).isDisjointFrom(large)
1755 and commonSkyPix.triangle(d).isDisjointFrom(small)
1756 for d in difference
1757 )
1758 child_difference_indices.append(difference)
1759 parent_difference_indices = list(
1760 _unpack_range_set(
1761 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1762 )
1763 )
1764 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1765 assert all(
1766 (
1767 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1768 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1769 )
1770 for d in parent_difference_indices
1771 )
1772 # Now that we've finally got those regions, we'll insert the large ones
1773 # as tract/patch dimension records.
1774 skymap_name = "testing_v1"
1775 registry.insertDimensionData(
1776 "skymap",
1777 {
1778 "name": skymap_name,
1779 "hash": bytes([42]),
1780 "tract_max": 1,
1781 "patch_nx_max": 2,
1782 "patch_ny_max": 2,
1783 },
1784 )
1785 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1786 registry.insertDimensionData(
1787 "patch",
1788 *[
1789 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1790 for n, c in enumerate(child_regions_large)
1791 ],
1792 )
1793 # Add at dataset that uses these dimensions to make sure that modifying
1794 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1795 # implement insert with replace=True as delete-then-insert).
1796 dataset_type = DatasetType(
1797 "coadd",
1798 dimensions=["tract", "patch"],
1799 universe=registry.dimensions,
1800 storageClass="Exposure",
1801 )
1802 registry.registerDatasetType(dataset_type)
1803 registry.registerCollection("the_run", CollectionType.RUN)
1804 registry.insertDatasets(
1805 dataset_type,
1806 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1807 run="the_run",
1808 )
1809 # Query for tracts and patches that overlap some "difference" htm9
1810 # pixels; there should be overlaps, because the database has
1811 # the "large" suite of regions.
1812 self.assertEqual(
1813 {0},
1814 {
1815 data_id["tract"]
1816 for data_id in registry.queryDataIds(
1817 ["tract"],
1818 skymap=skymap_name,
1819 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1820 )
1821 },
1822 )
1823 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1824 self.assertIn(
1825 patch_id,
1826 {
1827 data_id["patch"]
1828 for data_id in registry.queryDataIds(
1829 ["patch"],
1830 skymap=skymap_name,
1831 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1832 )
1833 },
1834 )
1835 # Use sync to update the tract region and insert to update the regions
1836 # of the patches, to the "small" suite.
1837 updated = registry.syncDimensionData(
1838 "tract",
1839 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1840 update=True,
1841 )
1842 self.assertEqual(updated, {"region": parent_region_large})
1843 registry.insertDimensionData(
1844 "patch",
1845 *[
1846 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1847 for n, c in enumerate(child_regions_small)
1848 ],
1849 replace=True,
1850 )
1851 # Query again; there now should be no such overlaps, because the
1852 # database has the "small" suite of regions.
1853 self.assertFalse(
1854 set(
1855 registry.queryDataIds(
1856 ["tract"],
1857 skymap=skymap_name,
1858 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1859 )
1860 )
1861 )
1862 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1863 self.assertNotIn(
1864 patch_id,
1865 {
1866 data_id["patch"]
1867 for data_id in registry.queryDataIds(
1868 ["patch"],
1869 skymap=skymap_name,
1870 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1871 )
1872 },
1873 )
1874 # Update back to the large regions and query one more time.
1875 updated = registry.syncDimensionData(
1876 "tract",
1877 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1878 update=True,
1879 )
1880 self.assertEqual(updated, {"region": parent_region_small})
1881 registry.insertDimensionData(
1882 "patch",
1883 *[
1884 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1885 for n, c in enumerate(child_regions_large)
1886 ],
1887 replace=True,
1888 )
1889 self.assertEqual(
1890 {0},
1891 {
1892 data_id["tract"]
1893 for data_id in registry.queryDataIds(
1894 ["tract"],
1895 skymap=skymap_name,
1896 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1897 )
1898 },
1899 )
1900 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1901 self.assertIn(
1902 patch_id,
1903 {
1904 data_id["patch"]
1905 for data_id in registry.queryDataIds(
1906 ["patch"],
1907 skymap=skymap_name,
1908 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1909 )
1910 },
1911 )
1913 def testCalibrationCollections(self):
1914 """Test operations on `~CollectionType.CALIBRATION` collections,
1915 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
1916 `SqlRegistry.findDataset`, and
1917 `DataCoordinateQueryResults.findRelatedDatasets`.
1918 """
1919 # Setup - make a Registry, fill it with some datasets in
1920 # non-calibration collections.
1921 registry = self.makeRegistry()
1922 self.loadData(registry, "base.yaml")
1923 self.loadData(registry, "datasets.yaml")
1924 # Set up some timestamps.
1925 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1926 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1927 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1928 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1929 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1930 allTimespans = [
1931 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1932 ]
1933 # Insert some exposure records with timespans between each sequential
1934 # pair of those.
1935 registry.insertDimensionData(
1936 "exposure",
1937 {
1938 "instrument": "Cam1",
1939 "id": 0,
1940 "obs_id": "zero",
1941 "physical_filter": "Cam1-G",
1942 "timespan": Timespan(t1, t2),
1943 },
1944 {
1945 "instrument": "Cam1",
1946 "id": 1,
1947 "obs_id": "one",
1948 "physical_filter": "Cam1-G",
1949 "timespan": Timespan(t2, t3),
1950 },
1951 {
1952 "instrument": "Cam1",
1953 "id": 2,
1954 "obs_id": "two",
1955 "physical_filter": "Cam1-G",
1956 "timespan": Timespan(t3, t4),
1957 },
1958 {
1959 "instrument": "Cam1",
1960 "id": 3,
1961 "obs_id": "three",
1962 "physical_filter": "Cam1-G",
1963 "timespan": Timespan(t4, t5),
1964 },
1965 )
1966 # Get references to some datasets.
1967 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1968 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1969 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1970 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1971 # Register the main calibration collection we'll be working with.
1972 collection = "Cam1/calibs/default"
1973 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1974 # Cannot associate into a calibration collection (no timespan).
1975 with self.assertRaises(CollectionTypeError):
1976 registry.associate(collection, [bias2a])
1977 # Certify 2a dataset with [t2, t4) validity.
1978 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1979 # Test that we can query for this dataset via the new collection, both
1980 # on its own and with a RUN collection.
1981 self.assertEqual(
1982 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
1983 {bias2a},
1984 )
1985 self.assertEqual(
1986 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
1987 {
1988 bias2a,
1989 bias2b,
1990 bias3b,
1991 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1992 },
1993 )
1994 self.assertEqual(
1995 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
1996 {registry.expandDataId(instrument="Cam1", detector=2)},
1997 )
1998 self.assertEqual(
1999 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2000 {
2001 registry.expandDataId(instrument="Cam1", detector=2),
2002 registry.expandDataId(instrument="Cam1", detector=3),
2003 registry.expandDataId(instrument="Cam1", detector=4),
2004 },
2005 )
2006 self.assertEqual(
2007 set(
2008 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2009 "bias", findFirst=True, collections=[collection]
2010 )
2011 ),
2012 {
2013 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2014 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2015 },
2016 )
2017 self.assertEqual(
2018 set(
2019 registry.queryDataIds(
2020 ["exposure", "detector"], instrument="Cam1", detector=2
2021 ).findRelatedDatasets("bias", findFirst=True, collections=[collection, "imported_r"])
2022 ),
2023 {
2024 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2025 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2026 (registry.expandDataId(instrument="Cam1", detector=2, exposure=0), bias2b),
2027 (registry.expandDataId(instrument="Cam1", detector=2, exposure=3), bias2b),
2028 },
2029 )
2031 # We should not be able to certify 2b with anything overlapping that
2032 # window.
2033 with self.assertRaises(ConflictingDefinitionError):
2034 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2035 with self.assertRaises(ConflictingDefinitionError):
2036 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2037 with self.assertRaises(ConflictingDefinitionError):
2038 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2039 with self.assertRaises(ConflictingDefinitionError):
2040 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2041 with self.assertRaises(ConflictingDefinitionError):
2042 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2043 with self.assertRaises(ConflictingDefinitionError):
2044 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2045 with self.assertRaises(ConflictingDefinitionError):
2046 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2047 with self.assertRaises(ConflictingDefinitionError):
2048 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2049 # We should be able to certify 3a with a range overlapping that window,
2050 # because it's for a different detector.
2051 # We'll certify 3a over [t1, t3).
2052 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2053 # Now we'll certify 2b and 3b together over [t4, ∞).
2054 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2056 # Fetch all associations and check that they are what we expect.
2057 self.assertCountEqual(
2058 list(
2059 registry.queryDatasetAssociations(
2060 "bias",
2061 collections=[collection, "imported_g", "imported_r"],
2062 )
2063 ),
2064 [
2065 DatasetAssociation(
2066 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2067 collection="imported_g",
2068 timespan=None,
2069 ),
2070 DatasetAssociation(
2071 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2072 collection="imported_r",
2073 timespan=None,
2074 ),
2075 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2076 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2077 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2078 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2079 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2080 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2081 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2082 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2083 ],
2084 )
2086 class Ambiguous:
2087 """Tag class to denote lookups that should be ambiguous."""
2089 pass
2091 def _assertLookup(
2092 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2093 ) -> None:
2094 """Local function that asserts that a bias lookup returns the given
2095 expected result.
2096 """
2097 if expected is Ambiguous:
2098 with self.assertRaises((DatasetTypeError, LookupError)):
2099 registry.findDataset(
2100 "bias",
2101 collections=collection,
2102 instrument="Cam1",
2103 detector=detector,
2104 timespan=timespan,
2105 )
2106 else:
2107 self.assertEqual(
2108 expected,
2109 registry.findDataset(
2110 "bias",
2111 collections=collection,
2112 instrument="Cam1",
2113 detector=detector,
2114 timespan=timespan,
2115 ),
2116 )
2118 # Systematically test lookups against expected results.
2119 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2120 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2121 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2122 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2123 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2124 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2125 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2126 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2127 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2128 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2129 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2130 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2131 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2132 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2133 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2134 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2135 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2136 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2137 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2138 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2139 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2140 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2141 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2142 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2143 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2144 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2145 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2146 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2147 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2148 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2149 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2150 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2151 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2152 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2153 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2154 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2155 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2156 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2157 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2158 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2159 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2160 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2162 # Test lookups via temporal joins to exposures.
2163 self.assertEqual(
2164 set(
2165 registry.queryDataIds(
2166 ["exposure", "detector"], instrument="Cam1", detector=2
2167 ).findRelatedDatasets("bias", collections=[collection])
2168 ),
2169 {
2170 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2171 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2172 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2173 },
2174 )
2175 self.assertEqual(
2176 set(
2177 registry.queryDataIds(
2178 ["exposure", "detector"], instrument="Cam1", detector=3
2179 ).findRelatedDatasets("bias", collections=[collection])
2180 ),
2181 {
2182 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2183 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2184 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2185 },
2186 )
2187 self.assertEqual(
2188 set(
2189 registry.queryDataIds(
2190 ["exposure", "detector"], instrument="Cam1", detector=2
2191 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2192 ),
2193 {
2194 (registry.expandDataId(instrument="Cam1", exposure=0, detector=2), bias2a),
2195 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2196 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2197 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2198 },
2199 )
2200 self.assertEqual(
2201 set(
2202 registry.queryDataIds(
2203 ["exposure", "detector"], instrument="Cam1", detector=3
2204 ).findRelatedDatasets("bias", collections=[collection, "imported_g"])
2205 ),
2206 {
2207 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2208 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2209 (registry.expandDataId(instrument="Cam1", exposure=2, detector=3), bias3a),
2210 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2211 },
2212 )
2214 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2215 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2216 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2217 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2218 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2219 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2220 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2221 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2222 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2223 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2224 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2225 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2226 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2227 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2228 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2229 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2230 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2231 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2232 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2233 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2234 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2235 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2236 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2237 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2238 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2239 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2240 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2241 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2242 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2243 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2244 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2245 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2246 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2247 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2248 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2249 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2250 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2251 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2252 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2253 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2254 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2255 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2256 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2257 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2258 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2259 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2261 # Decertify everything, this time with explicit data IDs, then check
2262 # that no lookups succeed.
2263 registry.decertify(
2264 collection,
2265 "bias",
2266 Timespan(None, None),
2267 dataIds=[
2268 dict(instrument="Cam1", detector=2),
2269 dict(instrument="Cam1", detector=3),
2270 ],
2271 )
2272 for detector in (2, 3):
2273 for timespan in allTimespans:
2274 _assertLookup(detector=detector, timespan=timespan, expected=None)
2275 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2276 # those.
2277 registry.certify(
2278 collection,
2279 [bias2a, bias3a],
2280 Timespan(None, None),
2281 )
2282 for timespan in allTimespans:
2283 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2284 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2285 # Decertify just bias2 over [t2, t4).
2286 # This should split a single certification row into two (and leave the
2287 # other existing row, for bias3a, alone).
2288 registry.decertify(
2289 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2290 )
2291 for timespan in allTimespans:
2292 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2293 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2294 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2295 if overlapsBefore and overlapsAfter:
2296 expected = Ambiguous
2297 elif overlapsBefore or overlapsAfter:
2298 expected = bias2a
2299 else:
2300 expected = None
2301 _assertLookup(detector=2, timespan=timespan, expected=expected)
2303 def testSkipCalibs(self):
2304 """Test how queries handle skipping of calibration collections."""
2305 registry = self.makeRegistry()
2306 self.loadData(registry, "base.yaml")
2307 self.loadData(registry, "datasets.yaml")
2309 coll_calib = "Cam1/calibs/default"
2310 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2312 # Add all biases to the calibration collection.
2313 # Without this, the logic that prunes dataset subqueries based on
2314 # datasetType-collection summary information will fire before the logic
2315 # we want to test below. This is a good thing (it avoids the dreaded
2316 # NotImplementedError a bit more often) everywhere but here.
2317 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2319 coll_list = [coll_calib, "imported_g", "imported_r"]
2320 chain = "Cam1/chain"
2321 registry.registerCollection(chain, type=CollectionType.CHAINED)
2322 registry.setCollectionChain(chain, coll_list)
2324 # explicit list will raise if findFirst=True or there are temporal
2325 # dimensions
2326 with self.assertRaises(NotImplementedError):
2327 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2328 with self.assertRaises(NotImplementedError):
2329 registry.queryDataIds(
2330 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2331 ).count()
2333 # chain will skip
2334 datasets = list(registry.queryDatasets("bias", collections=chain))
2335 self.assertGreater(len(datasets), 0)
2337 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2338 self.assertGreater(len(dataIds), 0)
2340 # glob will skip too
2341 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2342 self.assertGreater(len(datasets), 0)
2344 # regular expression will skip too
2345 pattern = re.compile(".*")
2346 datasets = list(registry.queryDatasets("bias", collections=pattern))
2347 self.assertGreater(len(datasets), 0)
2349 # ellipsis should work as usual
2350 datasets = list(registry.queryDatasets("bias", collections=...))
2351 self.assertGreater(len(datasets), 0)
2353 # few tests with findFirst
2354 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2355 self.assertGreater(len(datasets), 0)
2357 def testIngestTimeQuery(self):
2358 registry = self.makeRegistry()
2359 self.loadData(registry, "base.yaml")
2360 dt0 = datetime.datetime.now(datetime.UTC)
2361 self.loadData(registry, "datasets.yaml")
2362 dt1 = datetime.datetime.now(datetime.UTC)
2364 datasets = list(registry.queryDatasets(..., collections=...))
2365 len0 = len(datasets)
2366 self.assertGreater(len0, 0)
2368 where = "ingest_date > T'2000-01-01'"
2369 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2370 len1 = len(datasets)
2371 self.assertEqual(len0, len1)
2373 # no one will ever use this piece of software in 30 years
2374 where = "ingest_date > T'2050-01-01'"
2375 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2376 len2 = len(datasets)
2377 self.assertEqual(len2, 0)
2379 # Check more exact timing to make sure there is no 37 seconds offset
2380 # (after fixing DM-30124). SQLite time precision is 1 second, make
2381 # sure that we don't test with higher precision.
2382 tests = [
2383 # format: (timestamp, operator, expected_len)
2384 (dt0 - timedelta(seconds=1), ">", len0),
2385 (dt0 - timedelta(seconds=1), "<", 0),
2386 (dt1 + timedelta(seconds=1), "<", len0),
2387 (dt1 + timedelta(seconds=1), ">", 0),
2388 ]
2389 for dt, op, expect_len in tests:
2390 dt_str = dt.isoformat(sep=" ")
2392 where = f"ingest_date {op} T'{dt_str}'"
2393 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2394 self.assertEqual(len(datasets), expect_len)
2396 # same with bind using datetime or astropy Time
2397 where = f"ingest_date {op} ingest_time"
2398 datasets = list(
2399 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2400 )
2401 self.assertEqual(len(datasets), expect_len)
2403 dt_astropy = astropy.time.Time(dt, format="datetime")
2404 datasets = list(
2405 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2406 )
2407 self.assertEqual(len(datasets), expect_len)
2409 def testTimespanQueries(self):
2410 """Test query expressions involving timespans."""
2411 registry = self.makeRegistry()
2412 self.loadData(registry, "hsc-rc2-subset.yaml")
2413 # All exposures in the database; mapping from ID to timespan.
2414 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2415 # Just those IDs, sorted (which is also temporal sorting, because HSC
2416 # exposure IDs are monotonically increasing).
2417 ids = sorted(visits.keys())
2418 self.assertGreater(len(ids), 20)
2419 # Pick some quasi-random indexes into `ids` to play with.
2420 i1 = int(len(ids) * 0.1)
2421 i2 = int(len(ids) * 0.3)
2422 i3 = int(len(ids) * 0.6)
2423 i4 = int(len(ids) * 0.8)
2424 # Extract some times from those: just before the beginning of i1 (which
2425 # should be after the end of the exposure before), exactly the
2426 # beginning of i2, just after the beginning of i3 (and before its end),
2427 # and the exact end of i4.
2428 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2429 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2430 t2 = visits[ids[i2]].begin
2431 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2432 self.assertLess(t3, visits[ids[i3]].end)
2433 t4 = visits[ids[i4]].end
2434 # Make sure those are actually in order.
2435 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2437 bind = {
2438 "t1": t1,
2439 "t2": t2,
2440 "t3": t3,
2441 "t4": t4,
2442 "ts23": Timespan(t2, t3),
2443 }
2445 def query(where):
2446 """Return results as a sorted, deduplicated list of visit IDs.
2448 Parameters
2449 ----------
2450 where : `str`
2451 The WHERE clause for the query.
2452 """
2453 return sorted(
2454 {
2455 dataId["visit"]
2456 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2457 }
2458 )
2460 # Try a bunch of timespan queries, mixing up the bounds themselves,
2461 # where they appear in the expression, and how we get the timespan into
2462 # the expression.
2464 # t1 is before the start of i1, so this should not include i1.
2465 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2466 # t2 is exactly at the start of i2, but ends are exclusive, so these
2467 # should not include i2.
2468 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2469 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2470 # t3 is in the middle of i3, so this should include i3.
2471 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2472 # This one should not include t3 by the same reasoning.
2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2474 # t4 is exactly at the end of i4, so this should include i4.
2475 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2476 # i4's upper bound of t4 is exclusive so this should not include t4.
2477 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2479 # Now some timespan vs. time scalar queries.
2480 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2481 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2482 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2483 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2484 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2485 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2487 # Empty timespans should not overlap anything.
2488 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2490 def testCollectionSummaries(self):
2491 """Test recording and retrieval of collection summaries."""
2492 self.maxDiff = None
2493 registry = self.makeRegistry()
2494 # Importing datasets from yaml should go through the code path where
2495 # we update collection summaries as we insert datasets.
2496 self.loadData(registry, "base.yaml")
2497 self.loadData(registry, "datasets.yaml")
2498 flat = registry.getDatasetType("flat")
2499 expected1 = CollectionSummary()
2500 expected1.dataset_types.add(registry.getDatasetType("bias"))
2501 expected1.add_data_ids(
2502 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2503 )
2504 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2505 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2506 # Create a chained collection with both of the imported runs; the
2507 # summary should be the same, because it's a union with itself.
2508 chain = "chain"
2509 registry.registerCollection(chain, CollectionType.CHAINED)
2510 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2511 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2512 # Associate flats only into a tagged collection and a calibration
2513 # collection to check summaries of those.
2514 tag = "tag"
2515 registry.registerCollection(tag, CollectionType.TAGGED)
2516 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2517 calibs = "calibs"
2518 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2519 registry.certify(
2520 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2521 )
2522 expected2 = expected1.copy()
2523 expected2.dataset_types.discard("bias")
2524 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2525 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2526 # Explicitly calling SqlRegistry.refresh() should load those same
2527 # summaries, via a totally different code path.
2528 registry.refresh()
2529 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2530 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2531 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2532 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2534 def testBindInQueryDatasets(self):
2535 """Test that the bind parameter is correctly forwarded in
2536 queryDatasets recursion.
2537 """
2538 registry = self.makeRegistry()
2539 # Importing datasets from yaml should go through the code path where
2540 # we update collection summaries as we insert datasets.
2541 self.loadData(registry, "base.yaml")
2542 self.loadData(registry, "datasets.yaml")
2543 self.assertEqual(
2544 set(registry.queryDatasets("flat", band="r", collections=...)),
2545 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2546 )
2548 def testQueryIntRangeExpressions(self):
2549 """Test integer range expressions in ``where`` arguments.
2551 Note that our expressions use inclusive stop values, unlike Python's.
2552 """
2553 registry = self.makeRegistry()
2554 self.loadData(registry, "base.yaml")
2555 self.assertEqual(
2556 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2557 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2558 )
2559 self.assertEqual(
2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2562 )
2563 self.assertEqual(
2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2566 )
2568 def testQueryResultSummaries(self):
2569 """Test summary methods like `count`, `any`, and `explain_no_results`
2570 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
2571 """
2572 registry = self.makeRegistry()
2573 self.loadData(registry, "base.yaml")
2574 self.loadData(registry, "datasets.yaml")
2575 self.loadData(registry, "spatial.yaml")
2576 # Default test dataset has two collections, each with both flats and
2577 # biases. Add a new collection with only biases.
2578 registry.registerCollection("biases", CollectionType.TAGGED)
2579 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2580 # First query yields two results, and involves no postprocessing.
2581 query1 = registry.queryDataIds(["physical_filter"], band="r")
2582 self.assertTrue(query1.any(execute=False, exact=False))
2583 self.assertTrue(query1.any(execute=True, exact=False))
2584 self.assertTrue(query1.any(execute=True, exact=True))
2585 self.assertEqual(query1.count(exact=False), 2)
2586 self.assertEqual(query1.count(exact=True), 2)
2587 self.assertFalse(list(query1.explain_no_results()))
2588 # Second query should yield no results, which we should see when
2589 # we attempt to expand the data ID.
2590 query2 = registry.queryDataIds(["physical_filter"], band="h")
2591 # There's no execute=False, exact=Fals test here because the behavior
2592 # not something we want to guarantee in this case (and exact=False
2593 # says either answer is legal).
2594 self.assertFalse(query2.any(execute=True, exact=False))
2595 self.assertFalse(query2.any(execute=True, exact=True))
2596 self.assertEqual(query2.count(exact=False), 0)
2597 self.assertEqual(query2.count(exact=True), 0)
2598 self.assertTrue(list(query2.explain_no_results()))
2599 # These queries yield no results due to various problems that can be
2600 # spotted prior to execution, yielding helpful diagnostics.
2601 base_query = registry.queryDataIds(["detector", "physical_filter"])
2602 queries_and_snippets = [
2603 (
2604 # Dataset type name doesn't match any existing dataset types.
2605 registry.queryDatasets("nonexistent", collections=...),
2606 ["nonexistent"],
2607 ),
2608 (
2609 # Dataset type object isn't registered.
2610 registry.queryDatasets(
2611 DatasetType(
2612 "nonexistent",
2613 dimensions=["instrument"],
2614 universe=registry.dimensions,
2615 storageClass="Image",
2616 ),
2617 collections=...,
2618 ),
2619 ["nonexistent"],
2620 ),
2621 (
2622 # No datasets of this type in this collection.
2623 registry.queryDatasets("flat", collections=["biases"]),
2624 ["flat", "biases"],
2625 ),
2626 (
2627 # No datasets of this type in this collection.
2628 base_query.findDatasets("flat", collections=["biases"]),
2629 ["flat", "biases"],
2630 ),
2631 (
2632 # No collections matching at all.
2633 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2634 ["potato"],
2635 ),
2636 ]
2637 with self.assertRaises(MissingDatasetTypeError):
2638 # Dataset type name doesn't match any existing dataset types.
2639 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...)
2640 with self.assertRaises(MissingDatasetTypeError):
2641 # Dataset type name doesn't match any existing dataset types.
2642 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...)
2643 for query, snippets in queries_and_snippets:
2644 self.assertFalse(query.any(execute=False, exact=False))
2645 self.assertFalse(query.any(execute=True, exact=False))
2646 self.assertFalse(query.any(execute=True, exact=True))
2647 self.assertEqual(query.count(exact=False), 0)
2648 self.assertEqual(query.count(exact=True), 0)
2649 messages = list(query.explain_no_results())
2650 self.assertTrue(messages)
2651 # Want all expected snippets to appear in at least one message.
2652 self.assertTrue(
2653 any(
2654 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2655 ),
2656 messages,
2657 )
2659 # Wildcards on dataset types are not permitted in queryDataIds.
2660 with self.assertRaises(DatasetTypeExpressionError):
2661 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2663 # These queries yield no results due to problems that can be identified
2664 # by cheap follow-up queries, yielding helpful diagnostics.
2665 for query, snippets in [
2666 (
2667 # No records for one of the involved dimensions.
2668 registry.queryDataIds(["subfilter"]),
2669 ["no rows", "subfilter"],
2670 ),
2671 (
2672 # No records for one of the involved dimensions.
2673 registry.queryDimensionRecords("subfilter"),
2674 ["no rows", "subfilter"],
2675 ),
2676 ]:
2677 self.assertFalse(query.any(execute=True, exact=False))
2678 self.assertFalse(query.any(execute=True, exact=True))
2679 self.assertEqual(query.count(exact=True), 0)
2680 messages = list(query.explain_no_results())
2681 self.assertTrue(messages)
2682 # Want all expected snippets to appear in at least one message.
2683 self.assertTrue(
2684 any(
2685 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2686 ),
2687 messages,
2688 )
2690 # This query yields four overlaps in the database, but one is filtered
2691 # out in postprocessing. The count queries aren't accurate because
2692 # they don't account for duplication that happens due to an internal
2693 # join against commonSkyPix.
2694 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2695 self.assertEqual(
2696 {
2697 DataCoordinate.standardize(
2698 instrument="Cam1",
2699 skymap="SkyMap1",
2700 visit=v,
2701 tract=t,
2702 universe=registry.dimensions,
2703 )
2704 for v, t in [(1, 0), (2, 0), (2, 1)]
2705 },
2706 set(query3),
2707 )
2708 self.assertTrue(query3.any(execute=False, exact=False))
2709 self.assertTrue(query3.any(execute=True, exact=False))
2710 self.assertTrue(query3.any(execute=True, exact=True))
2711 self.assertGreaterEqual(query3.count(exact=False), 4)
2712 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2713 self.assertFalse(list(query3.explain_no_results()))
2714 # This query yields overlaps in the database, but all are filtered
2715 # out in postprocessing. The count queries again aren't very useful.
2716 # We have to use `where=` here to avoid an optimization that
2717 # (currently) skips the spatial postprocess-filtering because it
2718 # recognizes that no spatial join is necessary. That's not ideal, but
2719 # fixing it is out of scope for this ticket.
2720 query4 = registry.queryDataIds(
2721 ["visit", "tract"],
2722 instrument="Cam1",
2723 skymap="SkyMap1",
2724 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2725 )
2726 self.assertFalse(set(query4))
2727 self.assertTrue(query4.any(execute=False, exact=False))
2728 self.assertTrue(query4.any(execute=True, exact=False))
2729 self.assertFalse(query4.any(execute=True, exact=True))
2730 self.assertGreaterEqual(query4.count(exact=False), 1)
2731 self.assertEqual(query4.count(exact=True, discard=True), 0)
2732 messages = query4.explain_no_results()
2733 self.assertTrue(messages)
2734 self.assertTrue(any("overlap" in message for message in messages))
2735 # This query should yield results from one dataset type but not the
2736 # other, which is not registered.
2737 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2738 self.assertTrue(set(query5))
2739 self.assertTrue(query5.any(execute=False, exact=False))
2740 self.assertTrue(query5.any(execute=True, exact=False))
2741 self.assertTrue(query5.any(execute=True, exact=True))
2742 self.assertGreaterEqual(query5.count(exact=False), 1)
2743 self.assertGreaterEqual(query5.count(exact=True), 1)
2744 self.assertFalse(list(query5.explain_no_results()))
2745 # This query applies a selection that yields no results, fully in the
2746 # database. Explaining why it fails involves traversing the relation
2747 # tree and running a LIMIT 1 query at each level that has the potential
2748 # to remove rows.
2749 query6 = registry.queryDimensionRecords(
2750 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2751 )
2752 self.assertEqual(query6.count(exact=True), 0)
2753 messages = query6.explain_no_results()
2754 self.assertTrue(messages)
2755 self.assertTrue(any("no-purpose" in message for message in messages))
2757 def testQueryDataIdsExpressionError(self):
2758 """Test error checking of 'where' expressions in queryDataIds."""
2759 registry = self.makeRegistry()
2760 self.loadData(registry, "base.yaml")
2761 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
2762 with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
2763 registry.queryDataIds(["detector"], where="foo.bar = 12")
2764 with self.assertRaisesRegex(
2765 LookupError, "Dimension element name cannot be inferred in this context."
2766 ):
2767 registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)
2769 def testQueryDataIdsOrderBy(self):
2770 """Test order_by and limit on result returned by queryDataIds()."""
2771 registry = self.makeRegistry()
2772 self.loadData(registry, "base.yaml")
2773 self.loadData(registry, "datasets.yaml")
2774 self.loadData(registry, "spatial.yaml")
2776 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2777 return registry.queryDataIds(
2778 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2779 )
2781 Test = namedtuple(
2782 "testQueryDataIdsOrderByTest",
2783 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2784 defaults=(None, None, None),
2785 )
2787 test_data = (
2788 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2789 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2790 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2791 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2792 Test(
2793 "tract.id,visit.id",
2794 "tract,visit",
2795 ((0, 1), (0, 1), (0, 2)),
2796 limit=(3,),
2797 ),
2798 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2799 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2800 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2801 Test(
2802 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2803 ),
2804 Test(
2805 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2806 ),
2807 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2808 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2809 Test(
2810 "tract,-timespan.begin,timespan.end",
2811 "tract,visit",
2812 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2813 ),
2814 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2815 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2816 Test(
2817 "tract,detector",
2818 "tract,detector",
2819 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2820 datasets="flat",
2821 collections="imported_r",
2822 ),
2823 Test(
2824 "tract,detector.full_name",
2825 "tract,detector",
2826 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2827 datasets="flat",
2828 collections="imported_r",
2829 ),
2830 Test(
2831 "tract,detector.raft,detector.name_in_raft",
2832 "tract,detector",
2833 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2834 datasets="flat",
2835 collections="imported_r",
2836 ),
2837 )
2839 for test in test_data:
2840 order_by = test.order_by.split(",")
2841 keys = test.keys.split(",")
2842 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2843 if test.limit is not None:
2844 query = query.limit(*test.limit)
2845 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2846 self.assertEqual(dataIds, test.result)
2848 # and materialize
2849 query = do_query(keys).order_by(*order_by)
2850 if test.limit is not None:
2851 query = query.limit(*test.limit)
2852 with self.assertRaises(RelationalAlgebraError):
2853 with query.materialize():
2854 pass
2856 # errors in a name
2857 for order_by in ("", "-"):
2858 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2859 list(do_query().order_by(order_by))
2861 for order_by in ("undimension.name", "-undimension.name"):
2862 with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
2863 list(do_query().order_by(order_by))
2865 for order_by in ("attract", "-attract"):
2866 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2867 list(do_query().order_by(order_by))
2869 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2870 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2872 with self.assertRaisesRegex(
2873 ValueError,
2874 r"Timespan exists in more than one dimension element \(exposure, visit\); "
2875 r"qualify timespan with specific dimension name\.",
2876 ):
2877 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2879 with self.assertRaisesRegex(
2880 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2881 ):
2882 list(do_query("tract").order_by("timespan.begin"))
2884 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2885 list(do_query("tract").order_by("tract.timespan.begin"))
2887 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2888 list(do_query("tract").order_by("tract.name"))
2890 with self.assertRaisesRegex(
2891 ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
2892 ):
2893 list(do_query("visit").order_by("timestamp.begin"))
2895 def testQueryDataIdsGovernorExceptions(self):
2896 """Test exceptions raised by queryDataIds() for incorrect governors."""
2897 registry = self.makeRegistry()
2898 self.loadData(registry, "base.yaml")
2899 self.loadData(registry, "datasets.yaml")
2900 self.loadData(registry, "spatial.yaml")
2902 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2903 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2905 Test = namedtuple(
2906 "testQueryDataIdExceptionsTest",
2907 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2908 defaults=(None, None, None, {}, None, 0),
2909 )
2911 test_data = (
2912 Test("tract,visit", count=6),
2913 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2914 Test(
2915 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2916 ),
2917 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2918 Test(
2919 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2920 ),
2921 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2922 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2923 Test(
2924 "tract,visit",
2925 where="instrument=cam AND skymap=map",
2926 bind={"cam": "Cam1", "map": "SkyMap1"},
2927 count=6,
2928 ),
2929 Test(
2930 "tract,visit",
2931 where="instrument=cam AND skymap=map",
2932 bind={"cam": "Cam", "map": "SkyMap"},
2933 exception=DataIdValueError,
2934 ),
2935 )
2937 for test in test_data:
2938 dimensions = test.dimensions.split(",")
2939 if test.exception:
2940 with self.assertRaises(test.exception):
2941 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2942 else:
2943 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2944 self.assertEqual(query.count(discard=True), test.count)
2946 # and materialize
2947 if test.exception:
2948 with self.assertRaises(test.exception):
2949 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2950 with query.materialize() as materialized:
2951 materialized.count(discard=True)
2952 else:
2953 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2954 with query.materialize() as materialized:
2955 self.assertEqual(materialized.count(discard=True), test.count)
2957 def testQueryDimensionRecordsOrderBy(self):
2958 """Test order_by and limit on result returned by
2959 queryDimensionRecords().
2960 """
2961 registry = self.makeRegistry()
2962 self.loadData(registry, "base.yaml")
2963 self.loadData(registry, "datasets.yaml")
2964 self.loadData(registry, "spatial.yaml")
2966 def do_query(element, datasets=None, collections=None):
2967 return registry.queryDimensionRecords(
2968 element, instrument="Cam1", datasets=datasets, collections=collections
2969 )
2971 query = do_query("detector")
2972 self.assertEqual(len(list(query)), 4)
2974 Test = namedtuple(
2975 "testQueryDataIdsOrderByTest",
2976 ("element", "order_by", "result", "limit", "datasets", "collections"),
2977 defaults=(None, None, None),
2978 )
2980 test_data = (
2981 Test("detector", "detector", (1, 2, 3, 4)),
2982 Test("detector", "-detector", (4, 3, 2, 1)),
2983 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2984 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2985 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2986 Test("visit", "visit", (1, 2)),
2987 Test("visit", "-visit.id", (2, 1)),
2988 Test("visit", "zenith_angle", (1, 2)),
2989 Test("visit", "-visit.name", (2, 1)),
2990 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2991 )
2993 for test in test_data:
2994 order_by = test.order_by.split(",")
2995 query = do_query(test.element).order_by(*order_by)
2996 if test.limit is not None:
2997 query = query.limit(*test.limit)
2998 dataIds = tuple(rec.id for rec in query)
2999 self.assertEqual(dataIds, test.result)
3001 # errors in a name
3002 for order_by in ("", "-"):
3003 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3004 list(do_query("detector").order_by(order_by))
3006 for order_by in ("undimension.name", "-undimension.name"):
3007 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3008 list(do_query("detector").order_by(order_by))
3010 for order_by in ("attract", "-attract"):
3011 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3012 list(do_query("detector").order_by(order_by))
3014 for order_by in ("timestamp.begin", "-timestamp.begin"):
3015 with self.assertRaisesRegex(
3016 ValueError,
3017 r"Element name mismatch: 'timestamp' instead of 'visit'; "
3018 r"perhaps you meant 'timespan.begin'\?",
3019 ):
3020 list(do_query("visit").order_by(order_by))
3022 def testQueryDimensionRecordsExceptions(self):
3023 """Test exceptions raised by queryDimensionRecords()."""
3024 registry = self.makeRegistry()
3025 self.loadData(registry, "base.yaml")
3026 self.loadData(registry, "datasets.yaml")
3027 self.loadData(registry, "spatial.yaml")
3029 result = registry.queryDimensionRecords("detector")
3030 self.assertEqual(result.count(), 4)
3031 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3032 self.assertEqual(result.count(), 4)
3033 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3034 self.assertEqual(result.count(), 4)
3035 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3036 self.assertEqual(result.count(), 4)
3037 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3038 self.assertEqual(result.count(), 4)
3040 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3041 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3042 result.count()
3044 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3045 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3046 result.count()
3048 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3049 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3050 result.count()
3052 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3053 result = registry.queryDimensionRecords(
3054 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3055 )
3056 result.count()
3058 def testDatasetConstrainedDimensionRecordQueries(self):
3059 """Test that queryDimensionRecords works even when given a dataset
3060 constraint whose dimensions extend beyond the requested dimension
3061 element's.
3062 """
3063 registry = self.makeRegistry()
3064 self.loadData(registry, "base.yaml")
3065 self.loadData(registry, "datasets.yaml")
3066 # Query for physical_filter dimension records, using a dataset that
3067 # has both physical_filter and dataset dimensions.
3068 records = registry.queryDimensionRecords(
3069 "physical_filter",
3070 datasets=["flat"],
3071 collections="imported_r",
3072 )
3073 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3074 # Trying to constrain by all dataset types is an error.
3075 with self.assertRaises(TypeError):
3076 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3078 def testSkyPixDatasetQueries(self):
3079 """Test that we can build queries involving skypix dimensions as long
3080 as a dataset type that uses those dimensions is included.
3081 """
3082 registry = self.makeRegistry()
3083 self.loadData(registry, "base.yaml")
3084 dataset_type = DatasetType(
3085 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3086 )
3087 registry.registerDatasetType(dataset_type)
3088 run = "r"
3089 registry.registerRun(run)
3090 # First try queries where there are no datasets; the concern is whether
3091 # we can even build and execute these queries without raising, even
3092 # when "doomed" query shortcuts are in play.
3093 self.assertFalse(
3094 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3095 )
3096 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3097 # Now add a dataset and see that we can get it back.
3098 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3099 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3100 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3101 self.assertEqual(
3102 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3103 {data_id},
3104 )
3105 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3107 def testDatasetIdFactory(self):
3108 """Simple test for DatasetIdFactory, mostly to catch potential changes
3109 in its API.
3110 """
3111 registry = self.makeRegistry()
3112 factory = DatasetIdFactory()
3113 dataset_type = DatasetType(
3114 "datasetType",
3115 dimensions=["detector", "instrument"],
3116 universe=registry.dimensions,
3117 storageClass="int",
3118 )
3119 run = "run"
3120 data_id = DataCoordinate.standardize(
3121 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3122 )
3124 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3125 self.assertIsInstance(datasetId, uuid.UUID)
3126 self.assertEqual(datasetId.version, 4)
3128 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3129 self.assertIsInstance(datasetId, uuid.UUID)
3130 self.assertEqual(datasetId.version, 5)
3132 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3133 self.assertIsInstance(datasetId, uuid.UUID)
3134 self.assertEqual(datasetId.version, 5)
3136 def testExposureQueries(self):
3137 """Test query methods using arguments sourced from the exposure log
3138 service.
3140 The most complete test dataset currently available to daf_butler tests
3141 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3142 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3143 dimension records as it was focused on providing nontrivial spatial
3144 overlaps between visit+detector and tract+patch. So in this test we
3145 need to translate queries that originally used the exposure dimension
3146 to use the (very similar) visit dimension instead.
3147 """
3148 registry = self.makeRegistry()
3149 self.loadData(registry, "hsc-rc2-subset.yaml")
3150 self.assertEqual(
3151 [
3152 record.id
3153 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3154 .order_by("id")
3155 .limit(5)
3156 ],
3157 [318, 322, 326, 330, 332],
3158 )
3159 self.assertEqual(
3160 [
3161 data_id["visit"]
3162 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3163 ],
3164 [318, 322, 326, 330, 332],
3165 )
3166 self.assertEqual(
3167 [
3168 record.id
3169 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3170 .order_by("full_name")
3171 .limit(5)
3172 ],
3173 [73, 72, 71, 70, 65],
3174 )
3175 self.assertEqual(
3176 [
3177 data_id["detector"]
3178 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3179 .order_by("full_name")
3180 .limit(5)
3181 ],
3182 [73, 72, 71, 70, 65],
3183 )
3185 def test_long_query_names(self) -> None:
3186 """Test that queries involving very long names are handled correctly.
3188 This is especially important for PostgreSQL, which truncates symbols
3189 longer than 64 chars, but it's worth testing for all DBs.
3190 """
3191 registry = self.makeRegistry()
3192 name = "abcd" * 17
3193 registry.registerDatasetType(
3194 DatasetType(
3195 name,
3196 dimensions=(),
3197 storageClass="Exposure",
3198 universe=registry.dimensions,
3199 )
3200 )
3201 # Need to search more than one collection actually containing a
3202 # matching dataset to avoid optimizations that sidestep bugs due to
3203 # truncation by making findFirst=True a no-op.
3204 run1 = "run1"
3205 registry.registerRun(run1)
3206 run2 = "run2"
3207 registry.registerRun(run2)
3208 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3209 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3210 self.assertEqual(
3211 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3212 {ref1},
3213 )
3215 def test_skypix_constraint_queries(self) -> None:
3216 """Test queries spatially constrained by a skypix data ID."""
3217 registry = self.makeRegistry()
3218 self.loadData(registry, "hsc-rc2-subset.yaml")
3219 patch_regions = {
3220 (data_id["tract"], data_id["patch"]): data_id.region
3221 for data_id in registry.queryDataIds(["patch"]).expanded()
3222 }
3223 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3224 # This check ensures the test doesn't become trivial due to a config
3225 # change; if it does, just pick a different HTML level.
3226 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3227 # Gather all skypix IDs that definitely overlap at least one of these
3228 # patches.
3229 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3230 for patch_region in patch_regions.values():
3231 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3232 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3233 # and does not overlap at least one other patch.
3234 for skypix_id in itertools.chain.from_iterable(
3235 range(begin, end) for begin, end in relevant_skypix_ids
3236 ):
3237 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3238 overlapping_patches = {
3239 patch_key
3240 for patch_key, patch_region in patch_regions.items()
3241 if not patch_region.isDisjointFrom(skypix_region)
3242 }
3243 if overlapping_patches and overlapping_patches != patch_regions.keys():
3244 break
3245 else:
3246 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3247 self.assertEqual(
3248 {
3249 (data_id["tract"], data_id["patch"])
3250 for data_id in registry.queryDataIds(
3251 ["patch"],
3252 dataId={skypix_dimension.name: skypix_id},
3253 )
3254 },
3255 overlapping_patches,
3256 )
3257 # Test that a three-way join that includes the common skypix system in
3258 # the dimensions doesn't generate redundant join terms in the query.
3259 full_data_ids = set(
3260 registry.queryDataIds(
3261 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3262 ).expanded()
3263 )
3264 self.assertGreater(len(full_data_ids), 0)
3265 for data_id in full_data_ids:
3266 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3267 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3269 def test_spatial_constraint_queries(self) -> None:
3270 """Test queries in which one spatial dimension in the constraint (data
3271 ID or ``where`` string) constrains a different spatial dimension in the
3272 query result columns.
3273 """
3274 registry = self.makeRegistry()
3275 self.loadData(registry, "hsc-rc2-subset.yaml")
3276 patch_regions = {
3277 (data_id["tract"], data_id["patch"]): data_id.region
3278 for data_id in registry.queryDataIds(["patch"]).expanded()
3279 }
3280 observation_regions = {
3281 (data_id["visit"], data_id["detector"]): data_id.region
3282 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3283 }
3284 all_combos = {
3285 (patch_key, observation_key)
3286 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3287 }
3288 overlapping_combos = {
3289 (patch_key, observation_key)
3290 for patch_key, observation_key in all_combos
3291 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3292 }
3293 # Check a direct spatial join with no constraint first.
3294 self.assertEqual(
3295 {
3296 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3297 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3298 },
3299 overlapping_combos,
3300 )
3301 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3302 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3303 for patch_key, observation_key in overlapping_combos:
3304 overlaps_by_patch[patch_key].add(observation_key)
3305 overlaps_by_observation[observation_key].add(patch_key)
3306 # Find patches and observations that overlap at least one of the other
3307 # but not all of the other.
3308 nontrivial_patch = next(
3309 iter(
3310 patch_key
3311 for patch_key, observation_keys in overlaps_by_patch.items()
3312 if observation_keys and observation_keys != observation_regions.keys()
3313 )
3314 )
3315 nontrivial_observation = next(
3316 iter(
3317 observation_key
3318 for observation_key, patch_keys in overlaps_by_observation.items()
3319 if patch_keys and patch_keys != patch_regions.keys()
3320 )
3321 )
3322 # Use the nontrivial patches and observations as constraints on the
3323 # other dimensions in various ways, first via a 'where' expression.
3324 # It's better in general to us 'bind' instead of f-strings, but these
3325 # all integers so there are no quoting concerns.
3326 self.assertEqual(
3327 {
3328 (data_id["visit"], data_id["detector"])
3329 for data_id in registry.queryDataIds(
3330 ["visit", "detector"],
3331 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3332 skymap="hsc_rings_v1",
3333 )
3334 },
3335 overlaps_by_patch[nontrivial_patch],
3336 )
3337 self.assertEqual(
3338 {
3339 (data_id["tract"], data_id["patch"])
3340 for data_id in registry.queryDataIds(
3341 ["patch"],
3342 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3343 instrument="HSC",
3344 )
3345 },
3346 overlaps_by_observation[nontrivial_observation],
3347 )
3348 # and then via the dataId argument.
3349 self.assertEqual(
3350 {
3351 (data_id["visit"], data_id["detector"])
3352 for data_id in registry.queryDataIds(
3353 ["visit", "detector"],
3354 dataId={
3355 "tract": nontrivial_patch[0],
3356 "patch": nontrivial_patch[1],
3357 },
3358 skymap="hsc_rings_v1",
3359 )
3360 },
3361 overlaps_by_patch[nontrivial_patch],
3362 )
3363 self.assertEqual(
3364 {
3365 (data_id["tract"], data_id["patch"])
3366 for data_id in registry.queryDataIds(
3367 ["patch"],
3368 dataId={
3369 "visit": nontrivial_observation[0],
3370 "detector": nontrivial_observation[1],
3371 },
3372 instrument="HSC",
3373 )
3374 },
3375 overlaps_by_observation[nontrivial_observation],
3376 )
3378 def test_query_projection_drop_postprocessing(self) -> None:
3379 """Test that projections and deduplications on query objects can
3380 drop post-query region filtering to ensure the query remains in
3381 the SQL engine.
3382 """
3383 registry = self.makeRegistry()
3384 self.loadData(registry, "base.yaml")
3385 self.loadData(registry, "spatial.yaml")
3387 def pop_transfer(tree: Relation) -> Relation:
3388 """If a relation tree terminates with a transfer to a new engine,
3389 return the relation prior to that transfer. If not, return the
3390 original relation.
3392 Parameters
3393 ----------
3394 tree : `Relation`
3395 The relation tree to modify.
3396 """
3397 match tree:
3398 case Transfer(target=target):
3399 return target
3400 case _:
3401 return tree
3403 # There's no public way to get a Query object yet, so we get one from a
3404 # DataCoordinateQueryResults private attribute. When a public API is
3405 # available this test should use it.
3406 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3407 # We expect this query to terminate in the iteration engine originally,
3408 # because region-filtering is necessary.
3409 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3410 # If we deduplicate, we usually have to do that downstream of the
3411 # filtering. That means the deduplication has to happen in the
3412 # iteration engine.
3413 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3414 # If we pass drop_postprocessing, we instead drop the region filtering
3415 # so the deduplication can happen in SQL (though there might still be
3416 # transfer to iteration at the tail of the tree that we can ignore;
3417 # that's what the pop_transfer takes care of here).
3418 self.assertIsInstance(
3419 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3420 sql.Engine,
3421 )
3423 def test_query_find_datasets_drop_postprocessing(self) -> None:
3424 """Test that DataCoordinateQueryResults.findDatasets avoids commutator
3425 problems with the FindFirstDataset relation operation.
3426 """
3427 # Setup: load some visit, tract, and patch records, and insert two
3428 # datasets with dimensions {visit, patch}, with one in each of two
3429 # RUN collections.
3430 registry = self.makeRegistry()
3431 self.loadData(registry, "base.yaml")
3432 self.loadData(registry, "spatial.yaml")
3433 storage_class = StorageClass("Warpy")
3434 registry.storageClasses.registerStorageClass(storage_class)
3435 dataset_type = DatasetType(
3436 "warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
3437 )
3438 registry.registerDatasetType(dataset_type)
3439 (data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
3440 registry.registerRun("run1")
3441 registry.registerRun("run2")
3442 (ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
3443 (ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
3444 # Query for the dataset using queryDataIds(...).findDatasets(...)
3445 # against only one of the two collections. This should work even
3446 # though the relation returned by queryDataIds ends with
3447 # iteration-engine region-filtering, because we can recognize before
3448 # running the query that there is only one collecton to search and
3449 # hence the (default) findFirst=True is irrelevant, and joining in the
3450 # dataset query commutes past the iteration-engine postprocessing.
3451 query1 = registry.queryDataIds(
3452 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3453 )
3454 self.assertEqual(
3455 set(query1.findDatasets(dataset_type.name, collections=["run1"])),
3456 {ref1},
3457 )
3458 # Query for the dataset using queryDataIds(...).findDatasets(...)
3459 # against both collections. This can only work if the FindFirstDataset
3460 # operation can be commuted past the iteration-engine options into SQL.
3461 query2 = registry.queryDataIds(
3462 {"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
3463 )
3464 self.assertEqual(
3465 set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
3466 {ref2},
3467 )
3469 def test_query_empty_collections(self) -> None:
3470 """Test for registry query methods with empty collections. The methods
3471 should return empty result set (or None when applicable) and provide
3472 "doomed" diagnostics.
3473 """
3474 registry = self.makeRegistry()
3475 self.loadData(registry, "base.yaml")
3476 self.loadData(registry, "datasets.yaml")
3478 # Tests for registry.findDataset()
3479 with self.assertRaises(NoDefaultCollectionError):
3480 registry.findDataset("bias", instrument="Cam1", detector=1)
3481 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3482 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3484 # Tests for registry.queryDatasets()
3485 with self.assertRaises(NoDefaultCollectionError):
3486 registry.queryDatasets("bias")
3487 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3489 result = registry.queryDatasets("bias", collections=[])
3490 self.assertEqual(len(list(result)), 0)
3491 messages = list(result.explain_no_results())
3492 self.assertTrue(messages)
3493 self.assertTrue(any("because collection list is empty" in message for message in messages))
3495 # Tests for registry.queryDataIds()
3496 with self.assertRaises(NoDefaultCollectionError):
3497 registry.queryDataIds("detector", datasets="bias")
3498 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3500 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3501 self.assertEqual(len(list(result)), 0)
3502 messages = list(result.explain_no_results())
3503 self.assertTrue(messages)
3504 self.assertTrue(any("because collection list is empty" in message for message in messages))
3506 # Tests for registry.queryDimensionRecords()
3507 with self.assertRaises(NoDefaultCollectionError):
3508 registry.queryDimensionRecords("detector", datasets="bias")
3509 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3511 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3512 self.assertEqual(len(list(result)), 0)
3513 messages = list(result.explain_no_results())
3514 self.assertTrue(messages)
3515 self.assertTrue(any("because collection list is empty" in message for message in messages))
3517 def test_dataset_followup_spatial_joins(self) -> None:
3518 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3519 is involved.
3520 """
3521 registry = self.makeRegistry()
3522 self.loadData(registry, "base.yaml")
3523 self.loadData(registry, "spatial.yaml")
3524 pvi_dataset_type = DatasetType(
3525 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3526 )
3527 registry.registerDatasetType(pvi_dataset_type)
3528 collection = "datasets"
3529 registry.registerRun(collection)
3530 (pvi1,) = registry.insertDatasets(
3531 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3532 )
3533 (pvi2,) = registry.insertDatasets(
3534 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3535 )
3536 (pvi3,) = registry.insertDatasets(
3537 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3538 )
3539 self.assertEqual(
3540 set(
3541 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3542 .expanded()
3543 .findRelatedDatasets("pvi", [collection])
3544 ),
3545 {
3546 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3547 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3548 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3549 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
3550 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
3551 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
3552 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
3553 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
3554 },
3555 )