Coverage for python / lsst / daf / butler / registry / tests / _registry.py: 6%
1699 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-06 08:30 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27from __future__ import annotations
29from ... import ddl
31__all__ = ["RegistryTests"]
33import contextlib
34import datetime
35import itertools
36import re
37import time
38import unittest
39import uuid
40from abc import ABC, abstractmethod
41from collections import defaultdict, namedtuple
42from collections.abc import Callable, Iterator
43from concurrent.futures import ThreadPoolExecutor
44from contextlib import ExitStack
45from datetime import timedelta
46from threading import Barrier
47from typing import TypeVar
49import astropy.time
50import sqlalchemy
52try:
53 import numpy as np
54except ImportError:
55 np = None
57import lsst.sphgeom
59from ... import Butler
60from ..._collection_type import CollectionType
61from ..._dataset_association import DatasetAssociation
62from ..._dataset_ref import DatasetIdFactory, DatasetIdGenEnum, DatasetRef
63from ..._dataset_type import DatasetType
64from ..._exceptions import (
65 CalibrationLookupError,
66 CollectionTypeError,
67 DataIdValueError,
68 InconsistentDataIdError,
69 InvalidQueryError,
70 MissingCollectionError,
71 MissingDatasetTypeError,
72)
73from ..._exceptions_legacy import DatasetTypeError
74from ..._storage_class import StorageClass
75from ..._timespan import Timespan
76from ...dimensions import DataCoordinate, DataCoordinateSet, DimensionUniverse, SkyPixDimension
77from ...direct_butler import DirectButler
78from .._collection_summary import CollectionSummary
79from .._config import RegistryConfig
80from .._exceptions import (
81 ArgumentError,
82 CollectionError,
83 ConflictingDefinitionError,
84 DatasetTypeExpressionError,
85 NoDefaultCollectionError,
86 OrphanedRecordError,
87)
88from ..interfaces import ButlerAttributeExistsError, ReadOnlyDatabaseError
89from ..queries import ParentDatasetQueryResults
90from ..sql_registry import SqlRegistry
92_T = TypeVar("_T")
95class RegistryTests(ABC):
96 """Generic tests for the `SqlRegistry` class that can be subclassed to
97 generate tests for different configurations.
98 """
100 collectionsManager: str | None = None
101 """Name of the collections manager class, if subclass provides value for
102 this member then it overrides name specified in default configuration
103 (`str`).
104 """
106 datasetsManager: str | dict[str, str] | None = None
107 """Name or configuration dictionary of the datasets manager class, if
108 subclass provides value for this member then it overrides name specified
109 in default configuration (`str` or `dict`).
110 """
112 supportsCollectionRegex: bool = False
113 """True if the registry class being tested supports regex searches for
114 collections."""
116 def makeRegistryConfig(self) -> RegistryConfig:
117 """Create RegistryConfig used to create a registry.
119 This method should be called by a subclass from `makeRegistry`.
120 Returned instance will be pre-configured based on the values of class
121 members, and default-configured for all other parameters. Subclasses
122 that need default configuration should just instantiate
123 `RegistryConfig` directly.
124 """
125 config = RegistryConfig()
126 if self.collectionsManager:
127 config["managers", "collections"] = self.collectionsManager
128 if self.datasetsManager:
129 config["managers", "datasets"] = self.datasetsManager
130 return config
132 @abstractmethod
133 def make_butler(self, registry_config: RegistryConfig | None = None) -> Butler:
134 """Return the butler to be tested.
136 Parameters
137 ----------
138 registry_config : `RegistryConfig`, optional
139 Registry configuration used when instantiating the Butler.
141 Returns
142 -------
143 butler : `~lsst.daf.butler.Butler`
144 The butler with a registry to be tested.
145 """
146 raise NotImplementedError()
148 def load_data(self, butler: Butler, *filenames: str) -> None:
149 """Load registry test data from
150 ``resource://lsst.daf.butler/tests/registry_data/<filename>``,
151 which should be a YAML import/export file.
153 Parameters
154 ----------
155 butler : `Butler`
156 The butler to load into.
157 *filenames : `str`
158 The names of the files to load.
159 """
160 for filename in filenames:
161 butler.import_(
162 filename=f"resource://lsst.daf.butler/tests/registry_data/{filename}", without_datastore=True
163 )
165 def checkQueryResults(self, results, expected):
166 """Check that a query results object contains expected values.
168 Parameters
169 ----------
170 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
171 A lazy-evaluation query results object.
172 expected : `list`
173 A list of `DataCoordinate` o `DatasetRef` objects that should be
174 equal to results of the query, aside from ordering.
175 """
176 self.assertCountEqual(list(results), expected)
177 self.assertEqual(results.count(), len(expected))
178 if expected:
179 self.assertTrue(results.any())
180 else:
181 self.assertFalse(results.any())
183 def testOpaque(self):
184 """Tests for `SqlRegistry.registerOpaqueTable`,
185 `SqlRegistry.insertOpaqueData`, `SqlRegistry.fetchOpaqueData`, and
186 `SqlRegistry.deleteOpaqueData`.
187 """
188 butler = self.make_butler()
189 registry = butler._registry
190 table = "opaque_table_for_testing"
191 registry.registerOpaqueTable(
192 table,
193 spec=ddl.TableSpec(
194 fields=[
195 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
196 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
197 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
198 ],
199 ),
200 )
201 rows = [
202 {"id": 1, "name": "one", "count": None},
203 {"id": 2, "name": "two", "count": 5},
204 {"id": 3, "name": "three", "count": 6},
205 ]
206 registry.insertOpaqueData(table, *rows)
207 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
208 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
209 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
210 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
211 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
212 # Test very long IN clause which exceeds sqlite limit on number of
213 # parameters. SQLite says the limit is 32k but it looks like it is
214 # much higher.
215 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
216 # Two IN clauses, each longer than 1k batch size, first with
217 # duplicates, second has matching elements in different batches (after
218 # sorting).
219 self.assertEqual(
220 rows[0:2],
221 list(
222 registry.fetchOpaqueData(
223 table,
224 id=list(range(1000)) + list(range(100, 0, -1)),
225 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
226 )
227 ),
228 )
229 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
230 registry.deleteOpaqueData(table, id=3)
231 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
232 registry.deleteOpaqueData(table)
233 self.assertEqual([], list(registry.fetchOpaqueData(table)))
235 def testDatasetType(self):
236 """Tests for `SqlRegistry.registerDatasetType` and
237 `SqlRegistry.getDatasetType`.
238 """
239 butler = self.make_butler()
240 registry = butler.registry
241 # Check valid insert
242 datasetTypeName = "test"
243 storageClass = StorageClass("testDatasetType")
244 registry.storageClasses.registerStorageClass(storageClass)
245 dimensions = registry.dimensions.conform(("instrument", "visit"))
246 differentDimensions = registry.dimensions.conform(("instrument", "patch"))
247 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
248 # Inserting for the first time should return True
249 self.assertTrue(registry.registerDatasetType(inDatasetType))
250 outDatasetType1 = registry.getDatasetType(datasetTypeName)
251 self.assertEqual(outDatasetType1, inDatasetType)
253 # Re-inserting should work
254 self.assertFalse(registry.registerDatasetType(inDatasetType))
255 # Except when they are not identical
256 with self.assertRaises(ConflictingDefinitionError):
257 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
258 registry.registerDatasetType(nonIdenticalDatasetType)
260 # Template can be None
261 datasetTypeName = "testNoneTemplate"
262 storageClass = StorageClass("testDatasetType2")
263 registry.storageClasses.registerStorageClass(storageClass)
264 dimensions = registry.dimensions.conform(("instrument", "visit"))
265 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
266 registry.registerDatasetType(inDatasetType)
267 outDatasetType2 = registry.getDatasetType(datasetTypeName)
268 self.assertEqual(outDatasetType2, inDatasetType)
270 allTypes = set(registry.queryDatasetTypes())
271 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
273 # Test some basic queryDatasetTypes functionality
274 missing: list[str] = []
275 types = registry.queryDatasetTypes(["te*", "notarealdatasettype"], missing=missing)
276 self.assertCountEqual([dt.name for dt in types], ["test", "testNoneTemplate"])
277 self.assertEqual(missing, ["notarealdatasettype"])
279 # Trying to register a dataset type with different universe version or
280 # namespace will raise.
281 wrong_universes = (DimensionUniverse(version=-1), DimensionUniverse(namespace="🔭"))
282 for universe in wrong_universes:
283 storageClass = StorageClass("testDatasetType")
284 dataset_type = DatasetType(
285 "wrong_universe", ("instrument", "visit"), storageClass, universe=universe
286 )
287 with self.assertRaisesRegex(ValueError, "Incompatible dimension universe versions"):
288 registry.registerDatasetType(dataset_type)
290 def testDatasetTypeCache(self):
291 """Test for dataset type cache update logic after a cache miss."""
292 butler1 = self.make_butler()
293 butler2 = butler1.clone()
294 self.load_data(butler1, "base.yaml")
296 # Trigger full cache load.
297 butler2.get_dataset_type("flat")
298 # Have an external process register a dataset type.
299 butler1.registry.registerDatasetType(
300 DatasetType("test_type", ["instrument"], "int", universe=butler1.dimensions)
301 )
302 # Try to read the new dataset type -- this is a cache miss that
303 # triggers fetch of a single dataset type.
304 dt = butler2.get_dataset_type("test_type")
305 self.assertEqual(dt.name, "test_type")
306 self.assertEqual(list(dt.dimensions.names), ["instrument"])
307 # Read it again -- this time it should pull from the cache.
308 dt = butler2.get_dataset_type("test_type")
309 self.assertEqual(dt.name, "test_type")
310 self.assertEqual(list(dt.dimensions.names), ["instrument"])
311 # Do a query that uses the dataset type's tags table.
312 self.assertEqual(
313 butler2.query_datasets("test_type", collections="*", find_first=False, explain=False), []
314 )
316 def testDimensions(self):
317 """Tests for `SqlRegistry.insertDimensionData`,
318 `SqlRegistry.syncDimensionData`, and `SqlRegistry.expandDataId`.
319 """
320 butler = self.make_butler()
321 registry = butler.registry
322 dimensionName = "instrument"
323 dimension = registry.dimensions[dimensionName]
324 dimensionValue = {
325 "name": "DummyCam",
326 "visit_max": 10,
327 "visit_system": 0,
328 "exposure_max": 10,
329 "detector_max": 2,
330 "class_name": "lsst.pipe.base.Instrument",
331 }
332 registry.insertDimensionData(dimensionName, dimensionValue)
333 # Inserting the same value twice should fail
334 with self.assertRaises(sqlalchemy.exc.IntegrityError):
335 registry.insertDimensionData(dimensionName, dimensionValue)
336 # expandDataId should retrieve the record we just inserted
337 self.assertEqual(
338 registry.expandDataId(instrument="DummyCam", dimensions=dimension.minimal_group)
339 .records[dimensionName]
340 .toDict(),
341 dimensionValue,
342 )
343 # expandDataId should raise if there is no record with the given ID.
344 with self.assertRaises(DataIdValueError):
345 registry.expandDataId({"instrument": "Unknown"}, dimensions=dimension.minimal_group)
346 # band doesn't have a table; insert should fail.
347 with self.assertRaises(TypeError):
348 registry.insertDimensionData("band", {"band": "i"})
349 dimensionName2 = "physical_filter"
350 dimension2 = registry.dimensions[dimensionName2]
351 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
352 # Missing required dependency ("instrument") should fail
353 with self.assertRaises(KeyError):
354 registry.insertDimensionData(dimensionName2, dimensionValue2)
355 # Adding required dependency should fix the failure
356 dimensionValue2["instrument"] = "DummyCam"
357 registry.insertDimensionData(dimensionName2, dimensionValue2)
358 # expandDataId should retrieve the record we just inserted.
359 self.assertEqual(
360 registry.expandDataId(
361 instrument="DummyCam", physical_filter="DummyCam_i", dimensions=dimension2.minimal_group
362 )
363 .records[dimensionName2]
364 .toDict(),
365 dimensionValue2,
366 )
367 # Use syncDimensionData to insert a new record successfully.
368 dimensionName3 = "detector"
369 dimensionValue3 = {
370 "instrument": "DummyCam",
371 "id": 1,
372 "full_name": "one",
373 "name_in_raft": "zero",
374 "purpose": "SCIENCE",
375 }
376 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
377 # Sync that again. Note that one field ("raft") is NULL, and that
378 # should be okay.
379 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
380 # Now try that sync with the same primary key but a different value.
381 # This should fail.
382 with self.assertRaises(ConflictingDefinitionError):
383 registry.syncDimensionData(
384 dimensionName3,
385 {
386 "instrument": "DummyCam",
387 "id": 1,
388 "full_name": "one",
389 "name_in_raft": "four",
390 "purpose": "SCIENCE",
391 },
392 )
394 @unittest.skipIf(np is None, "numpy not available.")
395 def testNumpyDataId(self):
396 """Test that we can use a numpy int in a dataId."""
397 butler = self.make_butler()
398 registry = butler.registry
399 dimensionEntries = [
400 ("instrument", {"instrument": "DummyCam"}),
401 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
402 ("day_obs", {"instrument": "DummyCam", "id": 20250101}),
403 # Using an np.int64 here fails unless Records.fromDict is also
404 # patched to look for numbers.Integral
405 (
406 "visit",
407 {
408 "instrument": "DummyCam",
409 "id": 42,
410 "name": "fortytwo",
411 "physical_filter": "d-r",
412 "day_obs": 20250101,
413 },
414 ),
415 ]
416 for args in dimensionEntries:
417 registry.insertDimensionData(*args)
419 # Try a normal integer and something that looks like an int but
420 # is not.
421 for visit_id in (42, np.int64(42)):
422 with self.subTest(visit_id=repr(visit_id), id_type=type(visit_id).__name__):
423 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
424 self.assertEqual(expanded["visit"], int(visit_id))
425 self.assertIsInstance(expanded["visit"], int)
427 def testDataIdRelationships(self):
428 """Test that `SqlRegistry.expandDataId` raises an exception when the
429 given keys are inconsistent.
430 """
431 butler = self.make_butler()
432 self.load_data(butler, "base.yaml")
433 registry = butler.registry
434 # Insert a few more dimension records for the next test.
435 registry.insertDimensionData(
436 "day_obs",
437 {"instrument": "Cam1", "id": 20250101},
438 )
439 registry.insertDimensionData(
440 "group",
441 {"instrument": "Cam1", "name": "group1"},
442 )
443 registry.insertDimensionData(
444 "exposure",
445 {
446 "instrument": "Cam1",
447 "id": 1,
448 "obs_id": "one",
449 "physical_filter": "Cam1-G",
450 "group": "group1",
451 "day_obs": 20250101,
452 },
453 )
454 registry.insertDimensionData(
455 "group",
456 {"instrument": "Cam1", "name": "group2"},
457 )
458 registry.insertDimensionData(
459 "exposure",
460 {
461 "instrument": "Cam1",
462 "id": 2,
463 "obs_id": "two",
464 "physical_filter": "Cam1-G",
465 "group": "group2",
466 "day_obs": 20250101,
467 },
468 )
469 registry.insertDimensionData(
470 "visit_system",
471 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
472 )
473 registry.insertDimensionData(
474 "visit",
475 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "day_obs": 20250101},
476 )
477 registry.insertDimensionData(
478 "visit_definition",
479 {"instrument": "Cam1", "visit": 1, "exposure": 1},
480 )
481 with self.assertRaises(InconsistentDataIdError):
482 registry.expandDataId(
483 {"instrument": "Cam1", "visit": 1, "exposure": 2},
484 )
486 def testDataset(self):
487 """Basic tests for `SqlRegistry.insertDatasets`,
488 `SqlRegistry.getDataset`, and `SqlRegistry.removeDatasets`.
489 """
490 butler = self.make_butler()
491 registry = butler.registry
492 self.load_data(butler, "base.yaml")
493 run = "tésτ"
494 registry.registerRun(run)
495 datasetType = registry.getDatasetType("bias")
496 dataId = {"instrument": "Cam1", "detector": 2}
497 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
498 outRef = registry.getDataset(ref.id)
499 self.assertIsNotNone(ref.id)
500 self.assertEqual(ref, outRef)
501 with self.assertRaises(ConflictingDefinitionError):
502 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
503 registry.removeDatasets([ref])
504 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
506 def test_get_many_datasets(self):
507 butler = self.make_butler()
508 self.load_data(butler, "base.yaml", "datasets.yaml")
509 expected_refs = {
510 str(ref.id): ref
511 for ref in butler.query_all_datasets(["imported_g", "imported_r"], find_first=False)
512 }
514 # Set up a tagged collection containing a dataset used by the tests
515 # below. get_many_datasets() queries on tables shared between run
516 # collections and tagged collections, so this makes sure the tags don't
517 # interfere.
518 butler.collections.register("tagged", CollectionType.TAGGED)
519 butler.registry.associate("tagged", [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]])
521 # Empty input returns empty output.
522 self.assertEqual(butler.get_many_datasets([]), [])
523 # Datasets all of one type, but in different collections.
524 self.assertCountEqual(
525 butler.get_many_datasets(
526 ["60c8a65c-7290-4c38-b1de-e3b1cdcf872d", "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"]
527 ),
528 [
529 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"],
530 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"],
531 ],
532 )
533 # Datasets of multiple types with different dimension groups.
534 self.assertCountEqual(
535 butler.get_many_datasets(
536 [
537 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d",
538 "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c",
539 "87f3e68d-258d-41b7-8ea5-edf3557ccb30",
540 ]
541 ),
542 [
543 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"],
544 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"],
545 expected_refs["87f3e68d-258d-41b7-8ea5-edf3557ccb30"],
546 ],
547 )
548 # Missing datasets are omitted from the result.
549 self.assertCountEqual(
550 butler.get_many_datasets(
551 [
552 "238c3b83-f6e5-4ccb-a7b0-5028dec1dcbb",
553 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d",
554 ]
555 ),
556 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]],
557 )
558 # Duplicates are squashed in the result.
559 self.assertCountEqual(
560 butler.get_many_datasets(
561 [
562 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d",
563 "60c8a65c-7290-4c38-b1de-e3b1cdcf872d",
564 ]
565 ),
566 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]],
567 )
568 # Can use UUID instances as inputs instead of strings.
569 self.assertCountEqual(
570 butler.get_many_datasets(
571 [
572 uuid.UUID("60c8a65c-7290-4c38-b1de-e3b1cdcf872d"),
573 ]
574 ),
575 [expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"]],
576 )
577 # Bad ID format raises ValueError.
578 with self.assertRaises(ValueError):
579 butler.get_many_datasets(["not-a-valid-uuid"])
580 # Works with arbitrary iterables as input.
581 self.assertCountEqual(
582 butler.get_many_datasets(
583 itertools.chain(
584 ["60c8a65c-7290-4c38-b1de-e3b1cdcf872d", "d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"]
585 )
586 ),
587 [
588 expected_refs["60c8a65c-7290-4c38-b1de-e3b1cdcf872d"],
589 expected_refs["d0bb04cd-d697-4a83-ba53-cdfcd58e3a0c"],
590 ],
591 )
593 def test_fetch_run_dataset_ids(self):
594 butler = self.make_butler()
595 registry = butler._registry
596 self.load_data(butler, "base.yaml", "datasets.yaml")
597 dataset_ids = registry._fetch_run_dataset_ids("imported_r")
598 self.assertEqual(len(dataset_ids), 7)
599 refs = butler.query_all_datasets("imported_r")
600 self.assertCountEqual(dataset_ids, [ref.id for ref in refs])
602 def testFindDataset(self):
603 """Tests for `SqlRegistry.findDataset`."""
604 butler = self.make_butler()
605 registry = butler.registry
606 self.load_data(butler, "base.yaml")
607 run = "tésτ"
608 datasetType = registry.getDatasetType("bias")
609 dataId = {"instrument": "Cam1", "detector": 4}
610 registry.registerRun(run)
611 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
612 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
613 self.assertEqual(outputRef, inputRef)
614 # Check that retrieval with invalid dataId raises
615 with self.assertRaises(LookupError):
616 dataId = {"instrument": "Cam1"} # no detector
617 registry.findDataset(datasetType, dataId, collections=run)
618 # Check that different dataIds match to different datasets
619 dataId1 = {"instrument": "Cam1", "detector": 1}
620 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
621 dataId2 = {"instrument": "Cam1", "detector": 2}
622 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
623 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
624 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
625 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
626 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
627 # Check that requesting a non-existing dataId returns None
628 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
629 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
630 # Search more than one collection, in which two have the right
631 # dataset type and another does not.
632 registry.registerRun("empty")
633 self.load_data(butler, "datasets.yaml")
634 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
635 self.assertIsNotNone(bias1)
636 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
637 self.assertIsNotNone(bias2)
638 self.assertEqual(
639 bias1,
640 registry.findDataset(
641 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
642 ),
643 )
644 self.assertEqual(
645 bias2,
646 registry.findDataset(
647 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
648 ),
649 )
650 # If the input data ID was an expanded DataCoordinate with records,
651 # then the output ref has records, too.
652 expanded_id = registry.expandDataId({"instrument": "Cam1", "detector": 2})
653 expanded_ref = registry.findDataset("bias", expanded_id, collections=["imported_r"])
654 self.assertTrue(expanded_ref.dataId.hasRecords())
655 # Search more than one collection, with one of them a CALIBRATION
656 # collection.
657 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
658 timespan = Timespan(
659 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
660 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
661 )
662 registry.certify("Cam1/calib", [bias2], timespan=timespan)
663 self.assertEqual(
664 bias1,
665 registry.findDataset(
666 "bias",
667 instrument="Cam1",
668 detector=2,
669 collections=["empty", "imported_g", "Cam1/calib"],
670 timespan=timespan,
671 ),
672 )
673 self.assertEqual(
674 bias1,
675 registry.findDataset(
676 "bias",
677 instrument="Cam1",
678 detector=2,
679 # Calibration dataset type, with no calibration collection, but
680 # a timespan was provided.
681 collections=["imported_g"],
682 timespan=timespan,
683 ),
684 )
685 self.assertEqual(
686 bias2,
687 registry.findDataset(
688 "bias",
689 instrument="Cam1",
690 detector=2,
691 collections=["empty", "Cam1/calib", "imported_g"],
692 timespan=timespan,
693 ),
694 )
695 # If we try to search those same collections without a timespan, it
696 # should still work, since the CALIBRATION collection is ignored.
697 self.assertEqual(
698 bias1,
699 registry.findDataset(
700 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
701 ),
702 )
703 self.assertEqual(
704 bias1,
705 registry.findDataset(
706 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
707 ),
708 )
709 self.assertIsNone(
710 registry.findDataset("bias", instrument="Cam1", detector=2, collections=["Cam1/calib"])
711 )
712 # Test non-calibration dataset type.
713 registry.registerDatasetType(
714 DatasetType("noncalibration", ["instrument", "detector"], "int", universe=butler.dimensions)
715 )
716 (non_calibration_ref,) = registry.insertDatasets("noncalibration", dataIds=[dataId2], run=run)
717 self.assertIsNone(
718 registry.findDataset("noncalibration", instrument="Cam1", detector=2, collections=["imported_g"])
719 )
720 self.assertEqual(
721 non_calibration_ref,
722 registry.findDataset("noncalibration", instrument="Cam1", detector=2, collections=[run]),
723 )
724 # Timespan parameter is ignored for non-calibration dataset types.
725 self.assertIsNone(
726 registry.findDataset(
727 "noncalibration", instrument="Cam1", detector=2, collections=["imported_g"], timespan=timespan
728 )
729 )
730 self.assertEqual(
731 non_calibration_ref,
732 registry.findDataset(
733 "noncalibration", instrument="Cam1", detector=2, collections=[run], timespan=timespan
734 ),
735 )
736 self.assertEqual(
737 non_calibration_ref,
738 registry.findDataset(
739 "noncalibration",
740 instrument="Cam1",
741 detector=2,
742 collections=["Cam1/calib", run],
743 timespan=timespan,
744 ),
745 )
746 # Add a dataset type whose dimension group involves an "implied"
747 # dimension. ("physical_filter" implies "band".)
748 registry.registerDatasetType(
749 DatasetType(
750 "dt_with_implied",
751 [
752 "instrument",
753 "physical_filter",
754 ],
755 "int",
756 universe=butler.dimensions,
757 )
758 )
759 data_id = {"instrument": "Cam1", "physical_filter": "Cam1-G"}
760 (implied_ref,) = registry.insertDatasets("dt_with_implied", dataIds=[data_id], run=run)
761 found_ref = registry.findDataset("dt_with_implied", data_id, collections=[run])
762 self.assertEqual(implied_ref, found_ref)
763 # The "full" data ID with implied values is looked up, even though we
764 # provided only the "required" values.
765 self.assertTrue(found_ref.dataId.hasFull())
766 # The search ignores excess data ID values beyond the 'required' set.
767 # This is not the correct band value for this physical_filter, but
768 # the mismatch is ignored.
769 self.assertEqual(
770 implied_ref,
771 registry.findDataset(
772 "dt_with_implied",
773 {"instrument": "Cam1", "physical_filter": "Cam1-G", "band": "r"},
774 collections=[run],
775 ),
776 )
777 # Correct band value, wrong physical_filter.
778 self.assertIsNone(
779 registry.findDataset(
780 "dt_with_implied",
781 {"instrument": "Cam1", "physical_filter": "Cam1-R1", "band": "g"},
782 collections=[run],
783 ),
784 )
786 def testRemoveDatasetTypeSuccess(self):
787 """Test that SqlRegistry.removeDatasetType works when there are no
788 datasets of that type present.
789 """
790 butler = self.make_butler()
791 registry = butler.registry
792 self.load_data(butler, "base.yaml")
793 registry.removeDatasetType("flat")
794 with self.assertRaises(MissingDatasetTypeError):
795 registry.getDatasetType("flat")
797 def testRemoveDatasetTypeFailure(self):
798 """Test that SqlRegistry.removeDatasetType raises when there are
799 datasets of that type present or if the dataset type is for a
800 component.
801 """
802 butler = self.make_butler()
803 registry = butler.registry
804 self.load_data(butler, "base.yaml", "datasets.yaml")
805 with self.assertRaises(OrphanedRecordError):
806 registry.removeDatasetType("flat")
807 with self.assertRaises(DatasetTypeError):
808 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
810 def testImportDatasetsUUID(self):
811 """Test for `SqlRegistry._importDatasets` with UUID dataset ID."""
812 if isinstance(self.datasetsManager, str):
813 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
814 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
815 elif isinstance(self.datasetsManager, dict) and not self.datasetsManager["cls"].endswith(
816 ".ByDimensionsDatasetRecordStorageManagerUUID"
817 ):
818 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
820 butler = self.make_butler()
821 registry = butler.registry
822 self.load_data(butler, "base.yaml")
823 for run in range(6):
824 registry.registerRun(f"run{run}")
825 datasetTypeBias = registry.getDatasetType("bias")
826 datasetTypeFlat = registry.getDatasetType("flat")
827 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
828 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
829 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
831 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
832 (ref1,) = registry._importDatasets([ref], assume_new=True)
833 # UUID is used without change
834 self.assertEqual(ref.id, ref1.id)
836 # Inserting this ref with assume_new=True should fail, since this
837 # dataset exists.
838 with self.assertRaises(ConflictingDefinitionError):
839 registry._importDatasets([ref], assume_new=True)
841 # All different failure modes
842 refs = (
843 # Importing same DatasetRef with different dataset ID is an error
844 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
845 # Same DatasetId but different DataId
846 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
847 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
848 # Same DatasetRef and DatasetId but different run
849 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
850 )
851 for ref in refs:
852 with self.assertRaises(ConflictingDefinitionError):
853 registry._importDatasets([ref])
855 # Test for non-unique IDs, they can be re-imported multiple times.
856 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
857 with self.subTest(idGenMode=repr(idGenMode)):
858 # Make dataset ref with reproducible dataset ID.
859 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
860 (ref1,) = registry._importDatasets([ref])
861 self.assertIsInstance(ref1.id, uuid.UUID)
862 self.assertEqual(ref1.id.version, 5)
863 self.assertEqual(ref1.id, ref.id)
865 # Importing it again is OK
866 (ref2,) = registry._importDatasets([ref1])
867 self.assertEqual(ref2.id, ref1.id)
869 # Cannot import to different run with the same ID
870 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run + 1}")
871 with self.assertRaises(ConflictingDefinitionError):
872 registry._importDatasets([ref])
874 ref = DatasetRef(
875 datasetTypeBias, dataIdBias1, run=f"run{run + 1}", id_generation_mode=idGenMode
876 )
877 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
878 # Cannot import same DATAID_TYPE ref into a new run
879 with self.assertRaises(ConflictingDefinitionError):
880 (ref2,) = registry._importDatasets([ref])
881 else:
882 # DATAID_TYPE_RUN ref can be imported into a new run
883 (ref2,) = registry._importDatasets([ref])
885 def testComponentLookups(self):
886 """Test searching for component datasets via their parents.
888 Components can no longer be found by registry. This test checks
889 that this now fails.
890 """
891 butler = self.make_butler()
892 registry = butler.registry
893 self.load_data(butler, "base.yaml", "datasets.yaml")
894 # Test getting the child dataset type (which does still exist in the
895 # Registry), and check for consistency with
896 # DatasetRef.makeComponentRef.
897 collection = "imported_g"
898 parentType = registry.getDatasetType("bias")
899 childType = registry.getDatasetType("bias.wcs")
900 parentRefResolved = registry.findDataset(
901 parentType, collections=collection, instrument="Cam1", detector=1
902 )
903 self.assertIsInstance(parentRefResolved, DatasetRef)
904 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
905 # Search for a single dataset with findDataset.
906 with self.assertRaises(DatasetTypeError):
907 registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
909 def testCollections(self):
910 """Tests for registry methods that manage collections."""
911 butler = self.make_butler()
912 registry = butler.registry
913 other_registry = butler.clone().registry
914 self.load_data(butler, "base.yaml", "datasets.yaml")
915 run1 = "imported_g"
916 run2 = "imported_r"
917 # Test setting a collection docstring after it has been created.
918 registry.setCollectionDocumentation(run1, "doc for run1")
919 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
920 registry.setCollectionDocumentation(run1, None)
921 self.assertIsNone(registry.getCollectionDocumentation(run1))
922 datasetType = "bias"
923 # Find some datasets via their run's collection.
924 dataId1 = {"instrument": "Cam1", "detector": 1}
925 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
926 self.assertIsNotNone(ref1)
927 dataId2 = {"instrument": "Cam1", "detector": 2}
928 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
929 self.assertIsNotNone(ref2)
930 # Associate those into a new collection, then look for them there.
931 tag1 = "tag1"
932 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
933 # Check that we can query for old and new collections by type.
934 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
935 self.assertEqual(
936 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
937 {tag1, run1, run2},
938 )
939 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
940 registry.associate(tag1, [ref1, ref2])
941 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
942 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
943 # Disassociate one and verify that we can't it there anymore...
944 registry.disassociate(tag1, [ref1])
945 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
946 # ...but we can still find ref2 in tag1, and ref1 in the run.
947 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
948 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
949 collections = set(registry.queryCollections())
950 self.assertEqual(collections, {run1, run2, tag1})
951 # Associate both refs into tag1 again; ref2 is already there, but that
952 # should be a harmless no-op.
953 registry.associate(tag1, [ref1, ref2])
954 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
955 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
956 # Get a different dataset (from a different run) that has the same
957 # dataset type and data ID as ref2.
958 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
959 self.assertNotEqual(ref2, ref2b)
960 # Attempting to associate that into tag1 should be an error.
961 with self.assertRaises(ConflictingDefinitionError):
962 registry.associate(tag1, [ref2b])
963 # That error shouldn't have messed up what we had before.
964 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
965 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
966 # Attempt to associate the conflicting dataset again, this time with
967 # a dataset that isn't in the collection and won't cause a conflict.
968 # Should also fail without modifying anything.
969 dataId3 = {"instrument": "Cam1", "detector": 3}
970 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
971 with self.assertRaises(ConflictingDefinitionError):
972 registry.associate(tag1, [ref3, ref2b])
973 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
974 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
975 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
976 # Register a chained collection that searches [tag1, run2]
977 chain1 = "chain1"
978 registry.registerCollection(chain1, type=CollectionType.CHAINED)
979 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
980 # Chained collection exists, but has no collections in it.
981 self.assertFalse(registry.getCollectionChain(chain1))
982 # If we query for all collections, we should get the chained collection
983 # if we don't ask to flatten it (i.e. yield only its children) or if we
984 # explicitly ask to include it too.
985 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
986 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
987 self.assertEqual(
988 set(registry.queryCollections(flattenChains=True, includeChains=True)), {tag1, run1, run2, chain1}
989 )
990 # Attempt to set its child collections to something circular; that
991 # should fail.
992 with self.assertRaises(ValueError):
993 registry.setCollectionChain(chain1, [tag1, chain1])
994 # Add the child collections.
995 registry.setCollectionChain(chain1, [tag1, run2])
996 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
997 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
998 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
999 # Refresh the other registry that points to the same repo, and make
1000 # sure it can see the things we've done (note that this does require
1001 # an explicit refresh(); that's the documented behavior, because
1002 # caching is ~impossible otherwise).
1003 if other_registry is not None:
1004 other_registry.refresh()
1005 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
1006 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
1007 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
1008 # Searching for dataId1 or dataId2 in the chain should return ref1 and
1009 # ref2, because both are in tag1.
1010 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
1011 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
1012 # Now disassociate ref2 from tag1. The search (for bias) with
1013 # dataId2 in chain1 should then:
1014 # 1. not find it in tag1
1015 # 2. find a different dataset in run2
1016 registry.disassociate(tag1, [ref2])
1017 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
1018 self.assertNotEqual(ref2b, ref2)
1019 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
1020 # Define a new chain so we can test recursive chains.
1021 chain2 = "chain2"
1022 registry.registerCollection(chain2, type=CollectionType.CHAINED)
1023 registry.setCollectionChain(chain2, [run2, chain1])
1024 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
1025 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
1027 if self.supportsCollectionRegex:
1028 # Query for collections matching a regex.
1029 with self.assertWarns(FutureWarning):
1030 self.assertCountEqual(
1031 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
1032 ["imported_r", "imported_g"],
1033 )
1034 # Query for collections matching a regex or an explicit str.
1035 with self.assertWarns(FutureWarning):
1036 self.assertCountEqual(
1037 list(
1038 registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)
1039 ),
1040 ["imported_r", "imported_g", "chain1"],
1041 )
1042 # Same queries as the regex ones above, but using globs instead of
1043 # regex.
1044 self.assertCountEqual(
1045 list(registry.queryCollections("imported_*", flattenChains=False)),
1046 ["imported_r", "imported_g"],
1047 )
1048 # Query for collections matching a regex or an explicit str.
1049 self.assertCountEqual(
1050 list(registry.queryCollections(["imported_*", "chain1"], flattenChains=False)),
1051 ["imported_r", "imported_g", "chain1"],
1052 )
1053 # Query for collection matching chain names, by flattening it should
1054 # only return non-chain names.
1055 self.assertCountEqual(list(registry.queryCollections("chain?", flattenChains=True)), [tag1, run2])
1056 # Query for collection matching chain name, by flattening and
1057 # asking to include chains it should return everything.
1058 self.assertCountEqual(
1059 list(registry.queryCollections("chain*", flattenChains=True, includeChains=True)),
1060 [tag1, run2, chain1, chain2],
1061 )
1062 # Order of children in chained collections is preserved.
1063 self.assertEqual(list(registry.queryCollections("chain1", flattenChains=True)), [tag1, run2])
1064 self.assertEqual(list(registry.queryCollections("cha*2", flattenChains=True)), [run2, tag1])
1065 self.assertEqual(
1066 list(registry.queryCollections("chain1", flattenChains=True, includeChains=True)),
1067 [chain1, tag1, run2],
1068 )
1069 self.assertEqual(
1070 list(registry.queryCollections("chain2", flattenChains=True, includeChains=True)),
1071 [chain2, run2, chain1, tag1],
1072 )
1074 # Search for bias with dataId1 should find it via tag1 in chain2,
1075 # recursing, because is not in run1.
1076 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
1077 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
1078 # Search for bias with dataId2 should find it in run2 (ref2b).
1079 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
1080 # Search for a flat that is in run2. That should not be found
1081 # at the front of chain2, because of the restriction to bias
1082 # on run2 there, but it should be found in at the end of chain1.
1083 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
1084 ref4 = registry.findDataset("flat", dataId4, collections=run2)
1085 self.assertIsNotNone(ref4)
1086 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
1087 # Deleting a collection that's part of a CHAINED collection is not
1088 # allowed, and is exception-safe.
1089 with self.assertRaises(sqlalchemy.exc.IntegrityError):
1090 registry.removeCollection(run2)
1091 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
1092 with self.assertRaises(sqlalchemy.exc.IntegrityError):
1093 registry.removeCollection(chain1)
1094 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
1095 # Actually remove chain2, test that it's gone by asking for its type.
1096 registry.removeCollection(chain2)
1097 with self.assertRaises(MissingCollectionError):
1098 registry.getCollectionType(chain2)
1099 # Actually remove run2 and chain1, which should work now.
1100 registry.removeCollection(chain1)
1101 registry.removeCollection(run2)
1102 with self.assertRaises(MissingCollectionError):
1103 registry.getCollectionType(run2)
1104 with self.assertRaises(MissingCollectionError):
1105 registry.getCollectionType(chain1)
1106 # Remove tag1 as well, just to test that we can remove TAGGED
1107 # collections.
1108 registry.removeCollection(tag1)
1109 with self.assertRaises(MissingCollectionError):
1110 registry.getCollectionType(tag1)
1112 def test_collection_clearing(self) -> None:
1113 """Test that we can delete TAGGED and CALIBRATION collections without
1114 manually removing all associated datasets first.
1115 """
1116 butler = self.make_butler()
1117 self.load_data(butler, "base.yaml", "datasets.yaml")
1119 # This brings in datasets of two different types, with the same
1120 # dimension group.
1121 original_datasets = tuple(butler.query_all_datasets("imported_r", instrument="Cam1", detector=2))
1122 self.assertEqual(len(original_datasets), 2)
1124 # Test tagged collections.
1125 butler.collections.register("tag1", CollectionType.TAGGED)
1126 butler.collections.register("tag2", CollectionType.TAGGED)
1127 butler.registry.associate("tag1", original_datasets)
1128 butler.registry.associate("tag2", original_datasets)
1129 butler.collections.x_remove("tag1")
1130 with self.assertRaises(MissingCollectionError):
1131 butler.collections.get_info("tag1")
1132 # Make sure there was no collateral damage -- tag2 should still be
1133 # intact.
1134 self.assertEqual(set(butler.query_all_datasets("tag2")), set(original_datasets))
1136 # Test calibration collections.
1137 butler.collections.register("calib1", CollectionType.CALIBRATION)
1138 butler.collections.register("calib2", CollectionType.CALIBRATION)
1139 butler.registry.certify("calib1", original_datasets, Timespan(None, None))
1140 butler.registry.certify("calib2", original_datasets, Timespan(None, None))
1141 butler.collections.x_remove("calib1")
1142 with self.assertRaises(MissingCollectionError):
1143 butler.collections.get_info("calib1")
1144 # Make sure there was no collateral damage -- calib2 should still be
1145 # intact.
1146 self.assertEqual(set(butler.query_all_datasets("calib2")), set(original_datasets))
1148 def testCollectionChainCaching(self):
1149 butler = self.make_butler()
1150 registry = butler.registry
1151 with registry.caching_context():
1152 registry.registerCollection("a")
1153 registry.registerCollection("chain", CollectionType.CHAINED)
1154 # There used to be a caching bug (DM-43750) that would throw an
1155 # exception if you modified a collection chain for a collection
1156 # that was already in the cache.
1157 registry.setCollectionChain("chain", ["a"])
1158 self.assertEqual(list(registry.getCollectionChain("chain")), ["a"])
1160 def testCollectionChainFlatten(self):
1161 """Test that `SqlRegistry.setCollectionChain` obeys its 'flatten'
1162 option.
1163 """
1164 butler = self.make_butler()
1165 registry = butler.registry
1166 registry.registerCollection("inner", CollectionType.CHAINED)
1167 registry.registerCollection("innermost", CollectionType.RUN)
1168 registry.setCollectionChain("inner", ["innermost"])
1169 registry.registerCollection("outer", CollectionType.CHAINED)
1170 registry.setCollectionChain("outer", ["inner"], flatten=False)
1171 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
1172 registry.setCollectionChain("outer", ["inner"], flatten=True)
1173 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
1175 def testCollectionChainPrependConcurrency(self):
1176 """Verify that locking via database row locks is working as
1177 expected.
1178 """
1180 def blocked_thread_func(butler: Butler):
1181 # This call will become blocked after it has decided on positions
1182 # for the new children in the collection chain, but before
1183 # inserting them.
1184 butler.collections.prepend_chain("chain", ["a"])
1186 def unblocked_thread_func(butler: Butler):
1187 butler.collections.prepend_chain("chain", ["b"])
1189 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
1191 # blocked_thread_func should have finished first, inserting "a".
1192 # unblocked_thread_func should have finished second, prepending "b".
1193 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
1195 def testCollectionChainReplaceConcurrency(self):
1196 """Verify that locking via database row locks is working as
1197 expected.
1198 """
1200 def blocked_thread_func(butler: Butler):
1201 # This call will become blocked after deleting children, but before
1202 # inserting new ones.
1203 butler.collections.redefine_chain("chain", ["a"])
1205 def unblocked_thread_func(butler: Butler):
1206 butler.collections.redefine_chain("chain", ["b"])
1208 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
1210 # blocked_thread_func should have finished first.
1211 # unblocked_thread_func should have finished second, overwriting the
1212 # chain with "b".
1213 self.assertEqual(("b",), registry.getCollectionChain("chain"))
1215 def testCollectionChainRemoveConcurrency(self):
1216 def blocked_thread_func(butler: Butler):
1217 # This call will become blocked after taking the lock, but before
1218 # deleting the children.
1219 butler.collections.remove_from_chain("chain", ["b"])
1221 def unblocked_thread_func(butler: Butler):
1222 butler.collections.redefine_chain("chain", ["b", "a"])
1224 registry = self._do_collection_concurrency_test(blocked_thread_func, unblocked_thread_func)
1226 # blocked_thread_func should have finished first, removing "b".
1227 # unblocked_thread_func should have finished second, putting "b" back.
1228 self.assertEqual(("b", "a"), registry.getCollectionChain("chain"))
1230 def _do_collection_concurrency_test(
1231 self, blocked_thread_func: Callable[[Butler]], unblocked_thread_func: Callable[[Butler]]
1232 ) -> SqlRegistry:
1233 # This function:
1234 # 1. Sets up two registries pointing at the same database.
1235 # 2. Start running 'blocked_thread_func' in a background thread,
1236 # arranging for it to become blocked during a critical section in
1237 # the collections manager.
1238 # 3. Wait for 'blocked_thread_func' to reach the critical section
1239 # 4. Start running 'unblocked_thread_func'.
1240 # 5. Allow both functions to run to completion.
1242 # Set up two registries pointing to the same DB
1243 butler1 = self.make_butler()
1244 butler2 = butler1.clone()
1245 registry1 = butler1._registry
1246 assert isinstance(registry1, SqlRegistry)
1247 registry2 = butler2._registry
1249 with contextlib.suppress(AttributeError):
1250 if ":memory:" in str(registry2._db):
1251 raise unittest.SkipTest("Testing concurrency requires two connections to the same DB.")
1253 registry1.registerCollection("chain", CollectionType.CHAINED)
1254 for collection in ["a", "b"]:
1255 registry1.registerCollection(collection)
1257 # Arrange for registry1 to block during its critical section, allowing
1258 # us to detect this and control when it becomes unblocked.
1259 enter_barrier = Barrier(2, timeout=60)
1260 exit_barrier = Barrier(2, timeout=60)
1262 def wait_for_barrier():
1263 enter_barrier.wait()
1264 exit_barrier.wait()
1266 registry1._managers.collections._block_for_concurrency_test = wait_for_barrier
1268 with ThreadPoolExecutor(max_workers=1) as exec1:
1269 with ThreadPoolExecutor(max_workers=1) as exec2:
1270 future1 = exec1.submit(blocked_thread_func, butler1)
1271 enter_barrier.wait()
1273 # At this point registry 1 has entered the critical section and
1274 # is waiting for us to release it. Start the other thread.
1275 future2 = exec2.submit(unblocked_thread_func, butler2)
1276 # thread2 should block inside a database call, but we have no
1277 # way to detect when it is in this state.
1278 time.sleep(0.200)
1280 # Let the threads run to completion.
1281 exit_barrier.wait()
1282 future1.result()
1283 future2.result()
1285 return registry1
1287 def testBasicTransaction(self):
1288 """Test that all operations within a single transaction block are
1289 rolled back if an exception propagates out of the block.
1290 """
1291 butler = self.make_butler()
1292 registry = butler.registry
1293 storageClass = StorageClass("testDatasetType")
1294 registry.storageClasses.registerStorageClass(storageClass)
1295 with registry.transaction():
1296 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
1297 with self.assertRaises(ValueError):
1298 with registry.transaction():
1299 registry.insertDimensionData("instrument", {"name": "Cam2"})
1300 raise ValueError("Oops, something went wrong")
1301 # Cam1 should exist
1302 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
1303 # But Cam2 and Cam3 should both not exist
1304 with self.assertRaises(DataIdValueError):
1305 registry.expandDataId(instrument="Cam2")
1306 with self.assertRaises(DataIdValueError):
1307 registry.expandDataId(instrument="Cam3")
1309 def testNestedTransaction(self):
1310 """Test that operations within a transaction block are not rolled back
1311 if an exception propagates out of an inner transaction block and is
1312 then caught.
1313 """
1314 butler = self.make_butler()
1315 registry = butler.registry
1316 dimension = registry.dimensions["instrument"]
1317 dataId1 = {"instrument": "DummyCam"}
1318 dataId2 = {"instrument": "DummyCam2"}
1319 checkpointReached = False
1320 with registry.transaction():
1321 # This should be added and (ultimately) committed.
1322 registry.insertDimensionData(dimension, dataId1)
1323 with self.assertRaises(sqlalchemy.exc.IntegrityError):
1324 with registry.transaction(savepoint=True):
1325 # This does not conflict, and should succeed (but not
1326 # be committed).
1327 registry.insertDimensionData(dimension, dataId2)
1328 checkpointReached = True
1329 # This should conflict and raise, triggering a rollback
1330 # of the previous insertion within the same transaction
1331 # context, but not the original insertion in the outer
1332 # block.
1333 registry.insertDimensionData(dimension, dataId1)
1334 self.assertTrue(checkpointReached)
1335 self.assertIsNotNone(registry.expandDataId(dataId1, dimensions=dimension.minimal_group))
1336 with self.assertRaises(DataIdValueError):
1337 registry.expandDataId(dataId2, dimensions=dimension.minimal_group)
1339 def testInstrumentDimensions(self):
1340 """Test queries involving only instrument dimensions, with no joins to
1341 skymap.
1342 """
1343 butler = self.make_butler()
1344 registry = butler.registry
1346 # need a bunch of dimensions and datasets for test
1347 registry.insertDimensionData(
1348 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
1349 )
1350 registry.insertDimensionData("day_obs", dict(instrument="DummyCam", id=20250101))
1351 registry.insertDimensionData(
1352 "physical_filter",
1353 dict(instrument="DummyCam", name="dummy_r", band="r"),
1354 dict(instrument="DummyCam", name="dummy_i", band="i"),
1355 )
1356 registry.insertDimensionData(
1357 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
1358 )
1359 registry.insertDimensionData(
1360 "visit",
1361 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", day_obs=20250101),
1362 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", day_obs=20250101),
1363 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", day_obs=20250101),
1364 )
1365 registry.insertDimensionData(
1366 "group",
1367 dict(instrument="DummyCam", name="ten"),
1368 dict(instrument="DummyCam", name="eleven"),
1369 dict(instrument="DummyCam", name="twelve"),
1370 )
1371 for i in range(1, 6):
1372 registry.insertDimensionData(
1373 "visit_detector_region",
1374 dict(instrument="DummyCam", visit=10, detector=i),
1375 dict(instrument="DummyCam", visit=11, detector=i),
1376 dict(instrument="DummyCam", visit=20, detector=i),
1377 )
1378 registry.insertDimensionData(
1379 "exposure",
1380 dict(
1381 instrument="DummyCam",
1382 id=100,
1383 obs_id="100",
1384 physical_filter="dummy_i",
1385 group="ten",
1386 day_obs=20250101,
1387 ),
1388 dict(
1389 instrument="DummyCam",
1390 id=101,
1391 obs_id="101",
1392 physical_filter="dummy_i",
1393 group="ten",
1394 day_obs=20250101,
1395 ),
1396 dict(
1397 instrument="DummyCam",
1398 id=110,
1399 obs_id="110",
1400 physical_filter="dummy_r",
1401 group="eleven",
1402 day_obs=20250101,
1403 ),
1404 dict(
1405 instrument="DummyCam",
1406 id=111,
1407 obs_id="111",
1408 physical_filter="dummy_r",
1409 group="eleven",
1410 day_obs=20250101,
1411 ),
1412 dict(
1413 instrument="DummyCam",
1414 id=200,
1415 obs_id="200",
1416 physical_filter="dummy_r",
1417 group="twelve",
1418 day_obs=20250101,
1419 ),
1420 dict(
1421 instrument="DummyCam",
1422 id=201,
1423 obs_id="201",
1424 physical_filter="dummy_r",
1425 group="twelve",
1426 day_obs=20250101,
1427 ),
1428 )
1429 registry.insertDimensionData(
1430 "visit_definition",
1431 dict(instrument="DummyCam", exposure=100, visit=10),
1432 dict(instrument="DummyCam", exposure=101, visit=10),
1433 dict(instrument="DummyCam", exposure=110, visit=11),
1434 dict(instrument="DummyCam", exposure=111, visit=11),
1435 dict(instrument="DummyCam", exposure=200, visit=20),
1436 dict(instrument="DummyCam", exposure=201, visit=20),
1437 )
1438 # dataset types
1439 run1 = "test1_r"
1440 run2 = "test2_r"
1441 tagged2 = "test2_t"
1442 registry.registerRun(run1)
1443 registry.registerRun(run2)
1444 registry.registerCollection(tagged2)
1445 storageClass = StorageClass("testDataset")
1446 registry.storageClasses.registerStorageClass(storageClass)
1447 rawType = DatasetType(
1448 name="RAW",
1449 dimensions=registry.dimensions.conform(("instrument", "exposure", "detector")),
1450 storageClass=storageClass,
1451 )
1452 registry.registerDatasetType(rawType)
1453 calexpType = DatasetType(
1454 name="CALEXP",
1455 dimensions=registry.dimensions.conform(("instrument", "visit", "detector")),
1456 storageClass=storageClass,
1457 )
1458 registry.registerDatasetType(calexpType)
1460 # add pre-existing datasets
1461 for exposure in (100, 101, 110, 111):
1462 for detector in (1, 2, 3):
1463 # note that only 3 of 5 detectors have datasets
1464 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1465 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1466 # exposures 100 and 101 appear in both run1 and tagged2.
1467 # 100 has different datasets in the different collections
1468 # 101 has the same dataset in both collections.
1469 if exposure == 100:
1470 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1471 if exposure in (100, 101):
1472 registry.associate(tagged2, [ref])
1473 # Add pre-existing datasets to tagged2.
1474 for exposure in (200, 201):
1475 for detector in (3, 4, 5):
1476 # note that only 3 of 5 detectors have datasets
1477 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1478 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1479 registry.associate(tagged2, [ref])
1481 dimensions = registry.dimensions.conform(rawType.dimensions.required | calexpType.dimensions.required)
1482 # Test that single dim string works as well as list of str
1483 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1484 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1485 self.assertEqual(rows, rowsI)
1486 # with empty expression
1487 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1488 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1489 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111))
1490 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11))
1491 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1493 # second collection
1494 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1495 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1496 for dataId in rows:
1497 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1498 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 200, 201))
1499 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 20))
1500 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1502 # with two input datasets
1503 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1504 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1505 for dataId in rows:
1506 self.assertCountEqual(dataId.dimensions.required, ("instrument", "detector", "exposure", "visit"))
1507 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101, 110, 111, 200, 201))
1508 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10, 11, 20))
1509 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3, 4, 5))
1511 # limit to single visit
1512 rows = registry.queryDataIds(
1513 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1514 ).toSet()
1515 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1516 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1517 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1518 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1520 # more limiting expression, using link names instead of Table.column
1521 rows = registry.queryDataIds(
1522 dimensions,
1523 datasets=rawType,
1524 collections=run1,
1525 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1526 ).toSet()
1527 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1528 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (100, 101))
1529 self.assertCountEqual({dataId["visit"] for dataId in rows}, (10,))
1530 self.assertCountEqual({dataId["detector"] for dataId in rows}, (2, 3))
1532 # queryDataIds with only one of `datasets` and `collections` is an
1533 # error.
1534 with self.assertRaises(CollectionError):
1535 registry.queryDataIds(dimensions, datasets=rawType)
1536 with self.assertRaises(ArgumentError):
1537 registry.queryDataIds(dimensions, collections=run1)
1539 # expression excludes everything
1540 rows = registry.queryDataIds(
1541 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1542 ).toSet()
1543 self.assertEqual(len(rows), 0)
1545 # Selecting by physical_filter, this is not in the dimensions, but it
1546 # is a part of the full expression so it should work too.
1547 rows = registry.queryDataIds(
1548 dimensions,
1549 datasets=rawType,
1550 collections=run1,
1551 where="physical_filter = 'dummy_r'",
1552 instrument="DummyCam",
1553 ).toSet()
1554 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1555 self.assertCountEqual({dataId["exposure"] for dataId in rows}, (110, 111))
1556 self.assertCountEqual({dataId["visit"] for dataId in rows}, (11,))
1557 self.assertCountEqual({dataId["detector"] for dataId in rows}, (1, 2, 3))
1559 def testSkyMapDimensions(self):
1560 """Tests involving only skymap dimensions, no joins to instrument."""
1561 butler = self.make_butler()
1562 registry = butler.registry
1564 # need a bunch of dimensions and datasets for test, we want
1565 # "band" in the test so also have to add physical_filter
1566 # dimensions
1567 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1568 registry.insertDimensionData(
1569 "physical_filter",
1570 dict(instrument="DummyCam", name="dummy_r", band="r"),
1571 dict(instrument="DummyCam", name="dummy_i", band="i"),
1572 )
1573 registry.insertDimensionData("skymap", dict(name="DummyMap", hash=b"sha!"))
1574 for tract in range(10):
1575 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1576 registry.insertDimensionData(
1577 "patch",
1578 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1579 )
1581 # dataset types
1582 run = "tésτ"
1583 registry.registerRun(run)
1584 storageClass = StorageClass("testDataset")
1585 registry.storageClasses.registerStorageClass(storageClass)
1586 calexpType = DatasetType(
1587 name="deepCoadd_calexp",
1588 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1589 storageClass=storageClass,
1590 )
1591 registry.registerDatasetType(calexpType)
1592 mergeType = DatasetType(
1593 name="deepCoadd_mergeDet",
1594 dimensions=registry.dimensions.conform(("skymap", "tract", "patch")),
1595 storageClass=storageClass,
1596 )
1597 registry.registerDatasetType(mergeType)
1598 measType = DatasetType(
1599 name="deepCoadd_meas",
1600 dimensions=registry.dimensions.conform(("skymap", "tract", "patch", "band")),
1601 storageClass=storageClass,
1602 )
1603 registry.registerDatasetType(measType)
1605 dimensions = registry.dimensions.conform(
1606 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1607 )
1609 # add pre-existing datasets
1610 for tract in (1, 3, 5):
1611 for patch in (2, 4, 6, 7):
1612 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1613 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1614 for aFilter in ("i", "r"):
1615 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1616 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1618 # with empty expression
1619 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1620 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1621 for dataId in rows:
1622 self.assertCountEqual(dataId.dimensions.required, ("skymap", "tract", "patch", "band"))
1623 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1624 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1625 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1627 # limit to 2 tracts and 2 patches
1628 rows = registry.queryDataIds(
1629 dimensions,
1630 datasets=[calexpType, mergeType],
1631 collections=run,
1632 where="tract IN (1, 5) AND patch IN (2, 7)",
1633 skymap="DummyMap",
1634 ).toSet()
1635 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1636 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 5))
1637 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 7))
1638 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i", "r"))
1640 # limit to single filter
1641 rows = registry.queryDataIds(
1642 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1643 ).toSet()
1644 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1645 self.assertCountEqual({dataId["tract"] for dataId in rows}, (1, 3, 5))
1646 self.assertCountEqual({dataId["patch"] for dataId in rows}, (2, 4, 6, 7))
1647 self.assertCountEqual({dataId["band"] for dataId in rows}, ("i",))
1649 def do_query():
1650 return registry.queryDataIds(
1651 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1652 ).toSet()
1654 self.assertEqual(len(do_query()), 0)
1656 def testSpatialJoin(self):
1657 """Test queries that involve spatial overlap joins."""
1658 butler = self.make_butler()
1659 registry = butler.registry
1660 self.load_data(butler, "base.yaml", "spatial.yaml")
1662 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1663 # the TopologicalFamily they belong to. We'll relate all elements in
1664 # each family to all of the elements in each other family.
1665 families = defaultdict(set)
1666 # Dictionary of {element.name: {dataId: region}}.
1667 regions = {}
1668 for element in registry.dimensions.database_elements:
1669 if element.spatial is not None:
1670 families[element.spatial.name].add(element)
1671 regions[element.name] = {
1672 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1673 }
1675 # If this check fails, it's not necessarily a problem - it may just be
1676 # a reasonable change to the default dimension definitions - but the
1677 # test below depends on there being more than one family to do anything
1678 # useful.
1679 self.assertEqual(len(families), 2)
1681 # Overlap DatabaseDimensionElements with each other.
1682 for family1, family2 in itertools.combinations(families, 2):
1683 for element1, element2 in itertools.product(families[family1], families[family2]):
1684 dimensions = element1.minimal_group | element2.minimal_group
1685 # Construct expected set of overlapping data IDs via a
1686 # brute-force comparison of the regions we've already fetched.
1687 expected = {
1688 DataCoordinate.standardize(
1689 {**dataId1.required, **dataId2.required}, dimensions=dimensions
1690 )
1691 for (dataId1, region1), (dataId2, region2) in itertools.product(
1692 regions[element1.name].items(), regions[element2.name].items()
1693 )
1694 if not region1.isDisjointFrom(region2)
1695 }
1696 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1697 queried = set(registry.queryDataIds(dimensions))
1698 self.assertEqual(expected, queried)
1700 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1701 commonSkyPix = registry.dimensions.commonSkyPix
1702 for elementName, these_regions in regions.items():
1703 dimensions = registry.dimensions[elementName].minimal_group | commonSkyPix.minimal_group
1704 expected = set()
1705 for dataId, region in these_regions.items():
1706 for begin, end in commonSkyPix.pixelization.envelope(region):
1707 expected.update(
1708 DataCoordinate.standardize(
1709 {commonSkyPix.name: index, **dataId.required}, dimensions=dimensions
1710 )
1711 for index in range(begin, end)
1712 )
1713 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1714 queried = set(registry.queryDataIds(dimensions))
1715 self.assertEqual(expected, queried)
1717 def testAbstractQuery(self):
1718 """Test that we can run a query that just lists the known
1719 bands. This is tricky because band is
1720 backed by a query against physical_filter.
1721 """
1722 butler = self.make_butler()
1723 registry = butler.registry
1724 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1725 registry.insertDimensionData(
1726 "physical_filter",
1727 dict(instrument="DummyCam", name="dummy_i", band="i"),
1728 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1729 dict(instrument="DummyCam", name="dummy_r", band="r"),
1730 )
1731 rows = registry.queryDataIds(["band"]).toSet()
1732 self.assertCountEqual(
1733 rows,
1734 [
1735 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1736 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1737 ],
1738 )
1740 def testAttributeManager(self):
1741 """Test basic functionality of attribute manager."""
1742 # number of attributes with schema versions in a fresh database,
1743 # 6 managers with 2 records per manager, plus config for dimensions
1744 VERSION_COUNT = 6 * 2 + 1
1746 butler = self.make_butler()
1747 registry = butler._registry
1748 attributes = registry._managers.attributes
1750 # check what get() returns for non-existing key
1751 self.assertIsNone(attributes.get("attr"))
1752 self.assertEqual(attributes.get("attr", ""), "")
1753 self.assertEqual(attributes.get("attr", "Value"), "Value")
1754 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1756 # cannot store empty key or value
1757 with self.assertRaises(ValueError):
1758 attributes.set("", "value")
1759 with self.assertRaises(ValueError):
1760 attributes.set("attr", "")
1762 # set value of non-existing key
1763 attributes.set("attr", "value")
1764 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1765 self.assertEqual(attributes.get("attr"), "value")
1767 # update value of existing key
1768 with self.assertRaises(ButlerAttributeExistsError):
1769 attributes.set("attr", "value2")
1771 attributes.set("attr", "value2", force=True)
1772 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1773 self.assertEqual(attributes.get("attr"), "value2")
1775 # delete existing key
1776 self.assertTrue(attributes.delete("attr"))
1777 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1779 # delete non-existing key
1780 self.assertFalse(attributes.delete("non-attr"))
1782 # store bunch of keys and get the list back
1783 data = [
1784 ("version.core", "1.2.3"),
1785 ("version.dimensions", "3.2.1"),
1786 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1787 ]
1788 for key, value in data:
1789 attributes.set(key, value)
1790 items = dict(attributes.items())
1791 for key, value in data:
1792 self.assertEqual(items[key], value)
1794 def testQueryDatasetsDeduplication(self):
1795 """Test that the findFirst option to queryDatasets selects datasets
1796 from collections in the order given".
1797 """
1798 butler = self.make_butler()
1799 registry = butler.registry
1800 self.load_data(butler, "base.yaml", "datasets.yaml")
1801 self.assertCountEqual(
1802 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1803 [
1804 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1805 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1806 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1807 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1808 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1809 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1810 ],
1811 )
1812 self.assertCountEqual(
1813 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1814 [
1815 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1816 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1817 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1818 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1819 ],
1820 )
1821 self.assertCountEqual(
1822 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1823 [
1824 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1825 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1826 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1827 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1828 ],
1829 )
1831 with self.assertRaises(TypeError):
1832 # Collection wildcards not allowed in find-first searches because
1833 # they do not guarantee the ordering of collections.
1834 registry.queryDatasets("bias", collections="imported_*", findFirst=True)
1836 def testQueryDatasetsExtraDimensions(self):
1837 butler = self.make_butler()
1838 registry = butler.registry
1839 self.load_data(butler, "base.yaml", "datasets.yaml")
1840 # Bias dataset type does not include physical filter. By adding
1841 # "physical_filter" to dimensions, we are effectively searching here
1842 # for bias datasets with an instrument that has a specific filter
1843 # available, even though that filter has nothing to do with the bias
1844 # datasets we are finding.
1845 self.assertEqual(
1846 0,
1847 registry.queryDatasets(
1848 "bias",
1849 collections=...,
1850 dimensions=["physical_filter"],
1851 dataId={
1852 "instrument": "Cam1",
1853 "band": "not_a_real_band",
1854 },
1855 ).count(),
1856 )
1857 self.assertEqual(
1858 6,
1859 len(
1860 set(
1861 registry.queryDatasets(
1862 "bias",
1863 collections=...,
1864 dimensions=["physical_filter"],
1865 dataId={
1866 "instrument": "Cam1",
1867 "band": "r",
1868 },
1869 )
1870 )
1871 ),
1872 )
1874 def testQueryResults(self):
1875 """Test querying for data IDs and then manipulating the QueryResults
1876 object returned to perform other queries.
1877 """
1878 butler = self.make_butler()
1879 registry = butler.registry
1880 self.load_data(butler, "base.yaml", "datasets.yaml")
1881 bias = registry.getDatasetType("bias")
1882 flat = registry.getDatasetType("flat")
1883 # Obtain expected results from methods other than those we're testing
1884 # here. That includes:
1885 # - the dimensions of the data IDs we want to query:
1886 expected_dimensions = registry.dimensions.conform(["detector", "physical_filter"])
1887 # - the dimensions of some other data IDs we'll extract from that:
1888 expected_subset_dimensions = registry.dimensions.conform(["detector"])
1889 # - the data IDs we expect to obtain from the first queries:
1890 expectedDataIds = DataCoordinateSet(
1891 {
1892 DataCoordinate.standardize(
1893 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1894 )
1895 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1896 },
1897 dimensions=expected_dimensions,
1898 hasFull=False,
1899 hasRecords=False,
1900 )
1901 # - the flat datasets we expect to find from those data IDs, in just
1902 # one collection (so deduplication is irrelevant):
1903 expectedFlats = [
1904 registry.findDataset(
1905 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1906 ),
1907 registry.findDataset(
1908 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1909 ),
1910 registry.findDataset(
1911 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1912 ),
1913 ]
1914 # - the data IDs we expect to extract from that:
1915 expectedSubsetDataIds = expectedDataIds.subset(expected_subset_dimensions)
1916 # - the bias datasets we expect to find from those data IDs, after we
1917 # subset-out the physical_filter dimension, both with duplicates:
1918 expectedAllBiases = [
1919 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1920 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1921 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1922 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1923 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1924 ]
1925 # - ...and without duplicates:
1926 expectedDeduplicatedBiases = [
1927 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1928 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1929 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1930 ]
1931 # Test against those expected results, using a "lazy" query for the
1932 # data IDs (which re-executes that query each time we use it to do
1933 # something new).
1934 dataIds = registry.queryDataIds(
1935 ["detector", "physical_filter"],
1936 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1937 instrument="Cam1",
1938 )
1939 self.assertEqual(dataIds.dimensions, expected_dimensions)
1940 self.assertEqual(dataIds.toSet(), expectedDataIds)
1941 self.assertCountEqual(
1942 list(
1943 dataIds.findDatasets(
1944 flat,
1945 collections=["imported_r"],
1946 )
1947 ),
1948 expectedFlats,
1949 )
1950 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
1951 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1952 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1953 self.assertCountEqual(
1954 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1955 expectedAllBiases,
1956 )
1957 self.assertCountEqual(
1958 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1959 expectedDeduplicatedBiases,
1960 )
1962 # Searching for a dataset with dimensions we had projected away
1963 # restores those dimensions.
1964 self.assertCountEqual(
1965 list(subsetDataIds.findDatasets("flat", collections=["imported_r"], findFirst=True)),
1966 expectedFlats,
1967 )
1969 # Use a named dataset type that does not exist and a dataset type
1970 # object that does not exist.
1971 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1973 # Test both string name and dataset type object.
1974 test_type: str | DatasetType
1975 for test_type, test_type_name in (
1976 (unknown_type, unknown_type.name),
1977 (unknown_type.name, unknown_type.name),
1978 ):
1979 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1980 list(
1981 subsetDataIds.findDatasets(
1982 test_type, collections=["imported_r", "imported_g"], findFirst=True
1983 )
1984 )
1986 # Materialize the data ID subset query, but not the dataset queries.
1987 with subsetDataIds.materialize() as subsetDataIds:
1988 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
1989 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1990 self.assertCountEqual(
1991 list(
1992 subsetDataIds.findDatasets(
1993 bias, collections=["imported_r", "imported_g"], findFirst=False
1994 )
1995 ),
1996 expectedAllBiases,
1997 )
1998 self.assertCountEqual(
1999 list(
2000 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
2001 ),
2002 expectedDeduplicatedBiases,
2003 )
2004 # Materialize the original query, but none of the follow-up queries.
2005 with dataIds.materialize() as dataIds:
2006 self.assertEqual(dataIds.dimensions, expected_dimensions)
2007 self.assertEqual(dataIds.toSet(), expectedDataIds)
2008 self.assertCountEqual(
2009 list(
2010 dataIds.findDatasets(
2011 flat,
2012 collections=["imported_r"],
2013 )
2014 ),
2015 expectedFlats,
2016 )
2017 subsetDataIds = dataIds.subset(expected_subset_dimensions, unique=True)
2018 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
2019 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
2020 self.assertCountEqual(
2021 list(
2022 subsetDataIds.findDatasets(
2023 bias, collections=["imported_r", "imported_g"], findFirst=False
2024 )
2025 ),
2026 expectedAllBiases,
2027 )
2028 self.assertCountEqual(
2029 list(
2030 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
2031 ),
2032 expectedDeduplicatedBiases,
2033 )
2034 # Materialize the subset data ID query, but not the dataset
2035 # queries.
2036 with subsetDataIds.materialize() as subsetDataIds:
2037 self.assertEqual(subsetDataIds.dimensions, expected_subset_dimensions)
2038 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
2039 self.assertCountEqual(
2040 list(
2041 subsetDataIds.findDatasets(
2042 bias, collections=["imported_r", "imported_g"], findFirst=False
2043 )
2044 ),
2045 expectedAllBiases,
2046 )
2047 self.assertCountEqual(
2048 list(
2049 subsetDataIds.findDatasets(
2050 bias, collections=["imported_r", "imported_g"], findFirst=True
2051 )
2052 ),
2053 expectedDeduplicatedBiases,
2054 )
2056 def testStorageClassPropagation(self):
2057 """Test that queries for datasets respect the storage class passed in
2058 as part of a full dataset type.
2059 """
2060 butler = self.make_butler()
2061 registry = butler.registry
2062 self.load_data(butler, "base.yaml")
2063 dataset_type_in_registry = DatasetType(
2064 "tbl", dimensions=["instrument"], storageClass="Packages", universe=registry.dimensions
2065 )
2066 registry.registerDatasetType(dataset_type_in_registry)
2067 run = "run1"
2068 registry.registerRun(run)
2069 (inserted_ref,) = registry.insertDatasets(
2070 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
2071 )
2072 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
2073 query_dataset_type = DatasetType(
2074 "tbl", dimensions=["instrument"], storageClass="StructuredDataDict", universe=registry.dimensions
2075 )
2076 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
2077 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
2078 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
2079 (query_datasets_ref,) = query_datasets_result
2080 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
2081 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
2082 query_dataset_type, collections=[run]
2083 )
2084 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
2085 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
2086 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
2087 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
2088 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
2089 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
2090 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
2092 def testEmptyDimensionsQueries(self):
2093 """Test Query and QueryResults objects in the case where there are no
2094 dimensions.
2095 """
2096 # Set up test data: one dataset type, two runs, one dataset in each.
2097 butler = self.make_butler()
2098 registry = butler.registry
2099 self.load_data(butler, "base.yaml")
2100 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
2101 registry.registerDatasetType(schema)
2102 dataId = DataCoordinate.make_empty(registry.dimensions)
2103 run1 = "run1"
2104 run2 = "run2"
2105 registry.registerRun(run1)
2106 registry.registerRun(run2)
2107 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
2108 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
2109 # Query directly for both of the datasets, and each one, one at a time.
2110 self.checkQueryResults(
2111 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
2112 )
2113 self.checkQueryResults(
2114 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
2115 [dataset1],
2116 )
2117 self.checkQueryResults(
2118 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
2119 [dataset2],
2120 )
2121 # Query for data IDs with no dimensions.
2122 dataIds = registry.queryDataIds([])
2123 self.checkQueryResults(dataIds, [dataId])
2124 # Use queried data IDs to find the datasets.
2125 self.checkQueryResults(
2126 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
2127 [dataset1, dataset2],
2128 )
2129 self.checkQueryResults(
2130 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2131 [dataset1],
2132 )
2133 self.checkQueryResults(
2134 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2135 [dataset2],
2136 )
2137 # Now materialize the data ID query results and repeat those tests.
2138 with dataIds.materialize() as dataIds:
2139 self.checkQueryResults(dataIds, [dataId])
2140 self.checkQueryResults(
2141 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2142 [dataset1],
2143 )
2144 self.checkQueryResults(
2145 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2146 [dataset2],
2147 )
2148 # Query for non-empty data IDs, then subset that to get the empty one.
2149 # Repeat the above tests starting from that.
2150 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
2151 self.checkQueryResults(dataIds, [dataId])
2152 self.checkQueryResults(
2153 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
2154 [dataset1, dataset2],
2155 )
2156 self.checkQueryResults(
2157 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2158 [dataset1],
2159 )
2160 self.checkQueryResults(
2161 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2162 [dataset2],
2163 )
2164 with dataIds.materialize() as dataIds:
2165 self.checkQueryResults(dataIds, [dataId])
2166 self.checkQueryResults(
2167 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
2168 [dataset1, dataset2],
2169 )
2170 self.checkQueryResults(
2171 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2172 [dataset1],
2173 )
2174 self.checkQueryResults(
2175 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2176 [dataset2],
2177 )
2178 # Query for non-empty data IDs, then materialize, then subset to get
2179 # the empty one. Repeat again.
2180 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
2181 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
2182 self.checkQueryResults(dataIds, [dataId])
2183 self.checkQueryResults(
2184 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
2185 [dataset1, dataset2],
2186 )
2187 self.checkQueryResults(
2188 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2189 [dataset1],
2190 )
2191 self.checkQueryResults(
2192 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2193 [dataset2],
2194 )
2195 with dataIds.materialize() as dataIds:
2196 self.checkQueryResults(dataIds, [dataId])
2197 self.checkQueryResults(
2198 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
2199 [dataset1, dataset2],
2200 )
2201 self.checkQueryResults(
2202 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
2203 [dataset1],
2204 )
2205 self.checkQueryResults(
2206 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
2207 [dataset2],
2208 )
2209 # Query for non-empty data IDs with a constraint on an empty-data-ID
2210 # dataset that exists.
2211 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
2212 self.checkQueryResults(
2213 dataIds.subset(unique=True),
2214 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
2215 )
2216 # Again query for non-empty data IDs with a constraint on empty-data-ID
2217 # datasets, but when the datasets don't exist. We delete the existing
2218 # dataset and query just that collection rather than creating a new
2219 # empty collection because this is a bit less likely for our build-time
2220 # logic to shortcut-out (via the collection summaries), and such a
2221 # shortcut would make this test a bit more trivial than we'd like.
2222 registry.removeDatasets([dataset2])
2223 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
2224 self.checkQueryResults(dataIds, [])
2226 def testDimensionDataModifications(self):
2227 """Test that modifying dimension records via:
2228 syncDimensionData(..., update=True) and
2229 insertDimensionData(..., replace=True) works as expected, even in the
2230 presence of datasets using those dimensions and spatial overlap
2231 relationships.
2232 """
2234 def _unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
2235 """Unpack a sphgeom.RangeSet into the integers it contains."""
2236 for begin, end in ranges:
2237 yield from range(begin, end)
2239 def _range_set_hull(
2240 ranges: lsst.sphgeom.RangeSet,
2241 pixelization: lsst.sphgeom.HtmPixelization,
2242 ) -> lsst.sphgeom.ConvexPolygon:
2243 """Create a ConvexPolygon hull of the region defined by a set of
2244 HTM pixelization index ranges.
2245 """
2246 points = []
2247 for index in _unpack_range_set(ranges):
2248 points.extend(pixelization.triangle(index).getVertices())
2249 return lsst.sphgeom.ConvexPolygon(points)
2251 # Use HTM to set up an initial parent region (one arbitrary trixel)
2252 # and four child regions (the trixels within the parent at the next
2253 # level. We'll use the parent as a tract/visit region and the children
2254 # as its patch/visit_detector regions.
2255 butler = self.make_butler()
2256 registry = butler.registry
2257 htm6 = registry.dimensions.skypix["htm"][6].pixelization
2258 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
2259 index = 12288
2260 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
2261 assert htm6.universe().contains(child_ranges_small)
2262 child_regions_small = [htm6.triangle(i) for i in _unpack_range_set(child_ranges_small)]
2263 parent_region_small = lsst.sphgeom.ConvexPolygon(
2264 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
2265 )
2266 assert all(parent_region_small.contains(c) for c in child_regions_small)
2267 # Make a larger version of each child region, defined to be the set of
2268 # htm6 trixels that overlap the original's bounding circle. Make a new
2269 # parent that's the convex hull of the new children.
2270 child_regions_large = [
2271 _range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
2272 ]
2273 assert all(
2274 large.contains(small)
2275 for large, small in zip(child_regions_large, child_regions_small, strict=True)
2276 )
2277 parent_region_large = lsst.sphgeom.ConvexPolygon(
2278 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
2279 )
2280 assert all(parent_region_large.contains(c) for c in child_regions_large)
2281 assert parent_region_large.contains(parent_region_small)
2282 assert not parent_region_small.contains(parent_region_large)
2283 assert not all(parent_region_small.contains(c) for c in child_regions_large)
2284 # Find some commonSkyPix indices that overlap the large regions but not
2285 # overlap the small regions. We use commonSkyPix here to make sure the
2286 # real tests later involve what's in the database, not just post-query
2287 # filtering of regions.
2288 child_difference_indices = []
2289 for large, small in zip(child_regions_large, child_regions_small, strict=True):
2290 difference = list(_unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
2291 assert difference, "if this is empty, we can't test anything useful with these regions"
2292 assert all(
2293 not commonSkyPix.triangle(d).isDisjointFrom(large)
2294 and commonSkyPix.triangle(d).isDisjointFrom(small)
2295 for d in difference
2296 )
2297 child_difference_indices.append(difference)
2298 parent_difference_indices = list(
2299 _unpack_range_set(
2300 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
2301 )
2302 )
2303 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
2304 assert all(
2305 (
2306 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
2307 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
2308 )
2309 for d in parent_difference_indices
2310 )
2311 # Now that we've finally got those regions, we'll insert the large ones
2312 # as tract/patch dimension records.
2313 skymap_name = "testing_v1"
2314 registry.insertDimensionData(
2315 "skymap",
2316 {
2317 "name": skymap_name,
2318 "hash": bytes([42]),
2319 "tract_max": 1,
2320 "patch_nx_max": 2,
2321 "patch_ny_max": 2,
2322 },
2323 )
2324 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
2325 registry.insertDimensionData(
2326 "patch",
2327 *[
2328 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2329 for n, c in enumerate(child_regions_large)
2330 ],
2331 )
2332 # Add at dataset that uses these dimensions to make sure that modifying
2333 # them doesn't disrupt foreign keys (need to make sure DB doesn't
2334 # implement insert with replace=True as delete-then-insert).
2335 dataset_type = DatasetType(
2336 "coadd",
2337 dimensions=["tract", "patch"],
2338 universe=registry.dimensions,
2339 storageClass="Exposure",
2340 )
2341 registry.registerDatasetType(dataset_type)
2342 registry.registerCollection("the_run", CollectionType.RUN)
2343 registry.insertDatasets(
2344 dataset_type,
2345 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
2346 run="the_run",
2347 )
2348 # Query for tracts and patches that overlap some "difference" htm9
2349 # pixels; there should be overlaps, because the database has
2350 # the "large" suite of regions.
2351 self.assertEqual(
2352 {0},
2353 {
2354 data_id["tract"]
2355 for data_id in registry.queryDataIds(
2356 ["tract"],
2357 skymap=skymap_name,
2358 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2359 )
2360 },
2361 )
2362 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2363 self.assertIn(
2364 patch_id,
2365 {
2366 data_id["patch"]
2367 for data_id in registry.queryDataIds(
2368 ["patch"],
2369 skymap=skymap_name,
2370 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2371 )
2372 },
2373 )
2374 # Use sync to update the tract region and insert to update the regions
2375 # of the patches, to the "small" suite.
2376 updated = registry.syncDimensionData(
2377 "tract",
2378 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
2379 update=True,
2380 )
2381 self.assertEqual(updated, {"region": parent_region_large})
2382 registry.insertDimensionData(
2383 "patch",
2384 *[
2385 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2386 for n, c in enumerate(child_regions_small)
2387 ],
2388 replace=True,
2389 )
2390 # Query again; there now should be no such overlaps, because the
2391 # database has the "small" suite of regions.
2392 self.assertFalse(
2393 set(
2394 registry.queryDataIds(
2395 ["tract"],
2396 skymap=skymap_name,
2397 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2398 )
2399 )
2400 )
2401 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2402 self.assertNotIn(
2403 patch_id,
2404 {
2405 data_id["patch"]
2406 for data_id in registry.queryDataIds(
2407 ["patch"],
2408 skymap=skymap_name,
2409 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2410 )
2411 },
2412 )
2413 # Update back to the large regions and query one more time.
2414 updated = registry.syncDimensionData(
2415 "tract",
2416 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
2417 update=True,
2418 )
2419 self.assertEqual(updated, {"region": parent_region_small})
2420 registry.insertDimensionData(
2421 "patch",
2422 *[
2423 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2424 for n, c in enumerate(child_regions_large)
2425 ],
2426 replace=True,
2427 )
2428 self.assertEqual(
2429 {0},
2430 {
2431 data_id["tract"]
2432 for data_id in registry.queryDataIds(
2433 ["tract"],
2434 skymap=skymap_name,
2435 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2436 )
2437 },
2438 )
2439 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2440 self.assertIn(
2441 patch_id,
2442 {
2443 data_id["patch"]
2444 for data_id in registry.queryDataIds(
2445 ["patch"],
2446 skymap=skymap_name,
2447 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2448 )
2449 },
2450 )
2452 def testCalibrationCollections(self):
2453 """Test operations on `~CollectionType.CALIBRATION` collections,
2454 including `SqlRegistry.certify`, `SqlRegistry.decertify`,
2455 `SqlRegistry.findDataset`, and
2456 `DataCoordinateQueryResults.findRelatedDatasets`.
2457 """
2458 # Setup - make a Registry, fill it with some datasets in
2459 # non-calibration collections.
2460 butler = self.make_butler()
2461 registry = butler.registry
2462 self.load_data(butler, "base.yaml", "datasets.yaml")
2463 # Set up some timestamps.
2464 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2465 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2466 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2467 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2468 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2469 allTimespans = [
2470 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2471 ]
2472 # Insert some exposure records with timespans between each sequential
2473 # pair of those.
2474 registry.insertDimensionData(
2475 "day_obs", {"instrument": "Cam1", "id": 20200101, "timespan": Timespan(t1, t5)}
2476 )
2477 registry.insertDimensionData(
2478 "group",
2479 {"instrument": "Cam1", "name": "group0"},
2480 {"instrument": "Cam1", "name": "group1"},
2481 {"instrument": "Cam1", "name": "group2"},
2482 {"instrument": "Cam1", "name": "group3"},
2483 )
2484 registry.insertDimensionData(
2485 "exposure",
2486 {
2487 "instrument": "Cam1",
2488 "id": 0,
2489 "group": "group0",
2490 "obs_id": "zero",
2491 "physical_filter": "Cam1-G",
2492 "day_obs": 20200101,
2493 "timespan": Timespan(t1, t2),
2494 },
2495 {
2496 "instrument": "Cam1",
2497 "id": 1,
2498 "group": "group1",
2499 "obs_id": "one",
2500 "physical_filter": "Cam1-G",
2501 "day_obs": 20200101,
2502 "timespan": Timespan(t2, t3),
2503 },
2504 {
2505 "instrument": "Cam1",
2506 "id": 2,
2507 "group": "group2",
2508 "obs_id": "two",
2509 "physical_filter": "Cam1-G",
2510 "day_obs": 20200101,
2511 "timespan": Timespan(t3, t4),
2512 },
2513 {
2514 "instrument": "Cam1",
2515 "id": 3,
2516 "group": "group3",
2517 "obs_id": "three",
2518 "physical_filter": "Cam1-G",
2519 "day_obs": 20200101,
2520 "timespan": Timespan(t4, t5),
2521 },
2522 )
2523 # Get references to some datasets.
2524 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2525 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2526 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2527 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2528 # Register the main calibration collection we'll be working with.
2529 collection = "Cam1/calibs/default"
2530 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2531 # Cannot associate into a calibration collection (no timespan).
2532 with self.assertRaises(CollectionTypeError):
2533 registry.associate(collection, [bias2a])
2534 # Certify 2a dataset with [t2, t4) validity.
2535 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2536 # Test that we can query for this dataset via the new collection, both
2537 # on its own and with a RUN collection.
2538 self.assertEqual(
2539 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2540 {bias2a},
2541 )
2542 self.assertEqual(
2543 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2544 {
2545 bias2a,
2546 bias2b,
2547 bias3b,
2548 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2549 },
2550 )
2551 self.assertEqual(
2552 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2553 {registry.expandDataId(instrument="Cam1", detector=2)},
2554 )
2555 self.assertEqual(
2556 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2557 {
2558 registry.expandDataId(instrument="Cam1", detector=2),
2559 registry.expandDataId(instrument="Cam1", detector=3),
2560 registry.expandDataId(instrument="Cam1", detector=4),
2561 },
2562 )
2563 self.assertEqual(
2564 set(
2565 registry.queryDataIds(["exposure", "detector"]).findRelatedDatasets(
2566 "bias", findFirst=True, collections=[collection]
2567 )
2568 ),
2569 {
2570 (registry.expandDataId(instrument="Cam1", detector=2, exposure=1), bias2a),
2571 (registry.expandDataId(instrument="Cam1", detector=2, exposure=2), bias2a),
2572 },
2573 )
2575 # We should not be able to certify 2b with anything overlapping that
2576 # window.
2577 with self.assertRaises(ConflictingDefinitionError):
2578 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2579 with self.assertRaises(ConflictingDefinitionError):
2580 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2581 with self.assertRaises(ConflictingDefinitionError):
2582 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2583 with self.assertRaises(ConflictingDefinitionError):
2584 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2585 with self.assertRaises(ConflictingDefinitionError):
2586 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2587 with self.assertRaises(ConflictingDefinitionError):
2588 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2589 with self.assertRaises(ConflictingDefinitionError):
2590 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2591 with self.assertRaises(ConflictingDefinitionError):
2592 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2593 # We should be able to certify 3a with a range overlapping that window,
2594 # because it's for a different detector.
2595 # We'll certify 3a over [t1, t3).
2596 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2597 # Now we'll certify 2b and 3b together over [t4, ∞).
2598 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2600 # Fetch all associations and check that they are what we expect.
2601 self.assertCountEqual(
2602 list(
2603 registry.queryDatasetAssociations(
2604 "bias",
2605 collections=[collection, "imported_g", "imported_r"],
2606 )
2607 ),
2608 [
2609 DatasetAssociation(
2610 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2611 collection="imported_g",
2612 timespan=None,
2613 ),
2614 DatasetAssociation(
2615 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2616 collection="imported_r",
2617 timespan=None,
2618 ),
2619 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2620 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2621 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2622 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2623 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2624 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2625 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2626 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2627 ],
2628 )
2630 # Test dataset association query against a chained collection.
2631 # This is a regression test for DM-53179, as well as verification
2632 # that the flattenChains parameter has never had any effect.
2633 butler.collections.register("chain", CollectionType.CHAINED)
2634 butler.collections.redefine_chain("chain", [collection])
2635 expected_datasets = (
2636 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2637 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2638 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2639 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2640 )
2641 self.assertCountEqual(
2642 list(registry.queryDatasetAssociations("bias", collections=["chain"], flattenChains=False)),
2643 expected_datasets,
2644 )
2645 self.assertCountEqual(
2646 list(registry.queryDatasetAssociations("bias", collections=["chain"], flattenChains=True)),
2647 expected_datasets,
2648 )
2650 class Ambiguous:
2651 """Tag class to denote lookups that should be ambiguous."""
2653 pass
2655 def _assertLookup(
2656 detector: int, timespan: Timespan, expected: DatasetRef | type[Ambiguous] | None
2657 ) -> None:
2658 """Local function that asserts that a bias lookup returns the given
2659 expected result.
2660 """
2661 if expected is Ambiguous:
2662 with self.assertRaises((DatasetTypeError, LookupError)):
2663 registry.findDataset(
2664 "bias",
2665 collections=collection,
2666 instrument="Cam1",
2667 detector=detector,
2668 timespan=timespan,
2669 )
2670 else:
2671 self.assertEqual(
2672 expected,
2673 registry.findDataset(
2674 "bias",
2675 collections=collection,
2676 instrument="Cam1",
2677 detector=detector,
2678 timespan=timespan,
2679 ),
2680 )
2682 # Systematically test lookups against expected results.
2683 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2684 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2685 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2686 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2687 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2688 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2689 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2690 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2691 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2692 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2693 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2694 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2695 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2696 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2697 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2698 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2699 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2700 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2701 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2702 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2703 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2704 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2705 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2706 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2707 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2708 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2709 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2710 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2711 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2712 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2713 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2714 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2715 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2716 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2717 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2718 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2719 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2720 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2721 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2722 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2723 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2724 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2726 # Test lookups via temporal joins to exposures.
2727 self.assertEqual(
2728 set(
2729 registry.queryDataIds(
2730 ["exposure", "detector"], instrument="Cam1", detector=2
2731 ).findRelatedDatasets("bias", collections=[collection])
2732 ),
2733 {
2734 (registry.expandDataId(instrument="Cam1", exposure=1, detector=2), bias2a),
2735 (registry.expandDataId(instrument="Cam1", exposure=2, detector=2), bias2a),
2736 (registry.expandDataId(instrument="Cam1", exposure=3, detector=2), bias2b),
2737 },
2738 )
2739 self.assertEqual(
2740 set(
2741 registry.queryDataIds(
2742 ["exposure", "detector"], instrument="Cam1", detector=3
2743 ).findRelatedDatasets("bias", collections=[collection])
2744 ),
2745 {
2746 (registry.expandDataId(instrument="Cam1", exposure=0, detector=3), bias3a),
2747 (registry.expandDataId(instrument="Cam1", exposure=1, detector=3), bias3a),
2748 (registry.expandDataId(instrument="Cam1", exposure=3, detector=3), bias3b),
2749 },
2750 )
2752 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2753 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2754 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2755 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2756 _assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2757 _assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2758 _assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2759 _assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2760 _assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2761 _assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2762 _assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2763 _assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2764 _assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2765 _assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2766 _assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2767 _assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2768 _assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2769 _assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2770 _assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2771 _assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2772 _assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2773 _assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2774 _assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2775 _assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2776 _assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2777 _assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2778 _assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2779 _assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2780 _assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2781 _assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2782 _assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2783 _assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2784 _assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2785 _assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2786 _assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2787 _assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2788 _assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2789 _assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2790 _assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2791 _assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2792 _assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2793 _assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2794 _assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2795 _assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2796 _assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2797 _assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2799 # Decertify everything, this time with explicit data IDs, then check
2800 # that no lookups succeed.
2801 registry.decertify(
2802 collection,
2803 "bias",
2804 Timespan(None, None),
2805 dataIds=[
2806 dict(instrument="Cam1", detector=2),
2807 dict(instrument="Cam1", detector=3),
2808 ],
2809 )
2810 for detector in (2, 3):
2811 for timespan in allTimespans:
2812 _assertLookup(detector=detector, timespan=timespan, expected=None)
2813 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2814 # those.
2815 registry.certify(
2816 collection,
2817 [bias2a, bias3a],
2818 Timespan(None, None),
2819 )
2820 for timespan in allTimespans:
2821 _assertLookup(detector=2, timespan=timespan, expected=bias2a)
2822 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2823 # Decertify just bias2 over [t2, t4).
2824 # This should split a single certification row into two (and leave the
2825 # other existing row, for bias3a, alone).
2826 registry.decertify(
2827 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2828 )
2829 for timespan in allTimespans:
2830 _assertLookup(detector=3, timespan=timespan, expected=bias3a)
2831 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2832 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2833 if overlapsBefore and overlapsAfter:
2834 expected = Ambiguous
2835 elif overlapsBefore or overlapsAfter:
2836 expected = bias2a
2837 else:
2838 expected = None
2839 _assertLookup(detector=2, timespan=timespan, expected=expected)
2841 def testSkipCalibs(self):
2842 """Test how queries handle skipping of calibration collections."""
2843 butler = self.make_butler()
2844 registry = butler.registry
2845 self.load_data(butler, "base.yaml", "datasets.yaml")
2847 coll_calib = "Cam1/calibs/default"
2848 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2850 # Add all biases to the calibration collection.
2851 # Without this, the logic that prunes dataset subqueries based on
2852 # datasetType-collection summary information will fire before the logic
2853 # we want to test below. This is a good thing (it avoids the dreaded
2854 # NotImplementedError a bit more often) everywhere but here.
2855 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2857 coll_list = [coll_calib, "imported_g", "imported_r"]
2858 chain = "Cam1/chain"
2859 registry.registerCollection(chain, type=CollectionType.CHAINED)
2860 registry.setCollectionChain(chain, coll_list)
2862 # Lookup is ambiguous due to multiple datasets with the same data ID
2863 # in the calibration collection.
2864 with self.assertRaises(CalibrationLookupError):
2865 list(registry.queryDatasets("bias", collections=coll_list, findFirst=True))
2867 # chain will skip
2868 datasets = list(registry.queryDatasets("bias", collections=chain))
2869 self.assertGreater(len(datasets), 0)
2871 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2872 self.assertGreater(len(dataIds), 0)
2874 # glob will skip too
2875 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2876 self.assertGreater(len(datasets), 0)
2878 # regular expression will skip too
2879 if self.supportsCollectionRegex:
2880 pattern = re.compile(".*")
2881 with self.assertWarns(FutureWarning):
2882 datasets = list(registry.queryDatasets("bias", collections=pattern))
2883 self.assertGreater(len(datasets), 0)
2885 # ellipsis should work as usual
2886 datasets = list(registry.queryDatasets("bias", collections=...))
2887 self.assertGreater(len(datasets), 0)
2889 # New query system correctly determines that this search is
2890 # ambiguous, because there are multiple datasets with the same
2891 # {instrument=Cam1, detector=2} data ID in the calibration
2892 # collection at the beginning of the chain.
2893 with self.assertRaises(CalibrationLookupError):
2894 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2896 def testIngestTimeQuery(self):
2897 butler = self.make_butler()
2898 registry = butler.registry
2899 dt0 = datetime.datetime.now(datetime.UTC)
2900 self.load_data(butler, "base.yaml", "datasets.yaml")
2901 dt1 = datetime.datetime.now(datetime.UTC)
2903 datasets = list(registry.queryDatasets(..., collections=...))
2904 len0 = len(datasets)
2905 self.assertGreater(len0, 0)
2907 for where in ("ingest_date > T'2000-01-01'", "T'2000-01-01' < ingest_date"):
2908 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2909 len1 = len(datasets)
2910 self.assertEqual(len0, len1)
2912 # no one will ever use this piece of software in 30 years
2913 for where in ("ingest_date > T'2050-01-01'", "T'2050-01-01' < ingest_date"):
2914 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2915 len2 = len(datasets)
2916 self.assertEqual(len2, 0)
2918 # Check more exact timing to make sure there is no 37 seconds offset
2919 # (after fixing DM-30124). SQLite time precision is 1 second, make
2920 # sure that we don't test with higher precision.
2921 tests = [
2922 # format: (timestamp, operator, expected_len)
2923 (dt0 - timedelta(seconds=1), ">", len0),
2924 (dt0 - timedelta(seconds=1), "<", 0),
2925 (dt1 + timedelta(seconds=1), "<", len0),
2926 (dt1 + timedelta(seconds=1), ">", 0),
2927 ]
2928 for dt, op, expect_len in tests:
2929 dt_str = dt.isoformat(sep=" ")
2931 where = f"ingest_date {op} T'{dt_str}'"
2932 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2933 self.assertEqual(len(datasets), expect_len)
2935 # same with bind using datetime or astropy Time
2936 where = f"ingest_date {op} :ingest_time"
2937 datasets = list(
2938 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2939 )
2940 self.assertEqual(len(datasets), expect_len)
2942 dt_astropy = astropy.time.Time(dt, format="datetime")
2943 datasets = list(
2944 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2945 )
2946 self.assertEqual(len(datasets), expect_len)
2948 def testTimespanQueries(self):
2949 """Test query expressions involving timespans."""
2950 butler = self.make_butler()
2951 registry = butler.registry
2952 self.load_data(butler, "ci_hsc-subset.yaml")
2953 # All exposures in the database; mapping from ID to timespan.
2954 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2955 # Just those IDs, sorted (which is also temporal sorting, because HSC
2956 # exposure IDs are monotonically increasing).
2957 ids = sorted(visits.keys())
2958 self.assertEqual(len(ids), 11)
2959 # Pick some quasi-random indexes into `ids` to play with.
2960 i1 = int(len(ids) * 0.1)
2961 i2 = int(len(ids) * 0.3)
2962 i3 = int(len(ids) * 0.6)
2963 i4 = int(len(ids) * 0.8)
2964 # Extract some times from those: just before the beginning of i1 (which
2965 # should be after the end of the exposure before), exactly the
2966 # beginning of i2, just after the beginning of i3 (and before its end),
2967 # and the exact end of i4.
2968 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2969 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2970 t2 = visits[ids[i2]].begin
2971 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2972 self.assertLess(t3, visits[ids[i3]].end)
2973 t4 = visits[ids[i4]].end
2974 # Make sure those are actually in order.
2975 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2977 bind = {
2978 "t1": t1,
2979 "t2": t2,
2980 "t3": t3,
2981 "t4": t4,
2982 "ts23": Timespan(t2, t3),
2983 }
2985 def query(where):
2986 """Return results as a sorted, deduplicated list of visit IDs.
2988 Parameters
2989 ----------
2990 where : `str`
2991 The WHERE clause for the query.
2992 """
2993 return sorted(
2994 {
2995 dataId["visit"]
2996 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2997 }
2998 )
3000 # Try a bunch of timespan queries, mixing up the bounds themselves,
3001 # where they appear in the expression, and how we get the timespan into
3002 # the expression.
3004 # t1 is before the start of i1, so this should not include i1.
3005 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, :t1)"))
3006 # t2 is exactly at the start of i2, but ends are exclusive, so these
3007 # should not include i2.
3008 self.assertEqual(ids[i1:i2], query("(:t1, :t2) OVERLAPS visit.timespan"))
3009 # t3 is in the middle of i3, so this should include i3.
3010 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS :ts23"))
3011 # This one should not include t3 by the same reasoning.
3012 # t4 is exactly at the end of i4, so this should include i4.
3013 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}/tai', :t4)"))
3014 # i4's upper bound of t4 is exclusive so this should not include t4.
3015 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (:t4, NULL)"))
3017 # Now some timespan vs. time scalar queries.
3018 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS :t3"))
3019 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}/tai' OVERLAPS visit.timespan"))
3021 # Empty timespans should not overlap anything.
3022 self.assertEqual([], query("visit.timespan OVERLAPS (:t3, :t2)"))
3024 # Make sure that expanded data IDs include the timespans.
3025 results = list(
3026 registry.queryDataIds(["visit"], dataId={"instrument": "HSC", "visit": ids[1]}).expanded()
3027 )
3028 self.assertEqual(len(results), 1)
3029 visit_timespan = visits[ids[1]]
3030 self.assertEqual(results[0].timespan, visit_timespan)
3031 visit_record = results[0].records["visit"]
3032 assert visit_record is not None
3033 self.assertEqual(visit_record.timespan, visit_timespan)
3034 day_obs_record = results[0].records["day_obs"]
3035 assert day_obs_record is not None
3036 self.assertEqual(day_obs_record.id, 20130617)
3037 self.assertEqual(
3038 day_obs_record.timespan,
3039 Timespan(
3040 astropy.time.Time("2013-06-17T00:00:00", scale="tai"),
3041 astropy.time.Time("2013-06-18T00:00:00", scale="tai"),
3042 ),
3043 )
3045 def testCollectionSummaries(self):
3046 """Test recording and retrieval of collection summaries."""
3047 self.maxDiff = None
3048 butler = self.make_butler()
3049 registry = butler.registry
3050 # Importing datasets from yaml should go through the code path where
3051 # we update collection summaries as we insert datasets.
3052 self.load_data(butler, "base.yaml", "datasets.yaml")
3053 flat = registry.getDatasetType("flat")
3054 expected1 = CollectionSummary()
3055 expected1.dataset_types.add(registry.getDatasetType("bias"))
3056 expected1.add_data_ids(
3057 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
3058 )
3059 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
3060 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
3061 # Create a chained collection with both of the imported runs; the
3062 # summary should be the same, because it's a union with itself.
3063 chain = "chain"
3064 registry.registerCollection(chain, CollectionType.CHAINED)
3065 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
3066 self.assertEqual(registry.getCollectionSummary(chain), expected1)
3067 # Associate flats only into a tagged collection and a calibration
3068 # collection to check summaries of those.
3069 tag = "tag"
3070 registry.registerCollection(tag, CollectionType.TAGGED)
3071 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
3072 calibs = "calibs"
3073 registry.registerCollection(calibs, CollectionType.CALIBRATION)
3074 registry.certify(
3075 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
3076 )
3077 expected2 = expected1.copy()
3078 expected2.dataset_types.discard("bias")
3079 self.assertEqual(registry.getCollectionSummary(tag), expected2)
3080 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
3081 # Explicitly calling SqlRegistry.refresh() should load those same
3082 # summaries, via a totally different code path.
3083 registry.refresh()
3084 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
3085 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
3086 self.assertEqual(registry.getCollectionSummary(tag), expected2)
3087 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
3089 def testBindInQueryDatasets(self):
3090 """Test that the bind parameter is correctly forwarded in
3091 queryDatasets recursion.
3092 """
3093 butler = self.make_butler()
3094 registry = butler.registry
3095 # Importing datasets from yaml should go through the code path where
3096 # we update collection summaries as we insert datasets.
3097 self.load_data(butler, "base.yaml", "datasets.yaml")
3098 self.assertEqual(
3099 set(registry.queryDatasets("flat", band="r", collections=...)),
3100 set(
3101 registry.queryDatasets("flat", where="band=:my_band", bind={"my_band": "r"}, collections=...)
3102 ),
3103 )
3105 def testQueryIntRangeExpressions(self):
3106 """Test integer range expressions in ``where`` arguments.
3108 Note that our expressions use inclusive stop values, unlike Python's.
3109 """
3110 butler = self.make_butler()
3111 registry = butler.registry
3112 self.load_data(butler, "base.yaml")
3113 self.assertEqual(
3114 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
3115 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
3116 )
3117 self.assertEqual(
3118 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
3119 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
3120 )
3121 self.assertEqual(
3122 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
3123 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
3124 )
3126 def testQueryResultSummaries(self):
3127 """Test summary methods like `count`, `any`, and `explain_no_results`
3128 on `DataCoordinateQueryResults` and `DatasetQueryResults`.
3129 """
3130 butler = self.make_butler()
3131 registry = butler.registry
3132 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
3133 # Default test dataset has two collections, each with both flats and
3134 # biases. Add a new collection with only biases.
3135 registry.registerCollection("biases", CollectionType.TAGGED)
3136 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
3137 # First query yields two results, and involves no postprocessing.
3138 query1 = registry.queryDataIds(["physical_filter"], band="r")
3139 self.assertTrue(query1.any(execute=False, exact=False))
3140 self.assertTrue(query1.any(execute=True, exact=False))
3141 self.assertTrue(query1.any(execute=True, exact=True))
3142 self.assertEqual(query1.count(exact=False), 2)
3143 self.assertEqual(query1.count(exact=True), 2)
3144 self.assertFalse(list(query1.explain_no_results()))
3145 # Second query should yield no results, which we should see when
3146 # we attempt to expand the data ID.
3147 query2 = registry.queryDataIds(["physical_filter"], band="h")
3148 # There's no execute=False, exact=False test here because the behavior
3149 # not something we want to guarantee in this case (and exact=False
3150 # says either answer is legal).
3151 self.assertFalse(query2.any(execute=True, exact=False))
3152 self.assertFalse(query2.any(execute=True, exact=True))
3153 self.assertEqual(query2.count(exact=False), 0)
3154 self.assertEqual(query2.count(exact=True), 0)
3155 # These queries yield no results due to various problems that can be
3156 # spotted prior to execution, yielding helpful diagnostics.
3157 base_query = registry.queryDataIds(["detector", "physical_filter"])
3158 queries_and_snippets = [
3159 (
3160 # Dataset type name doesn't match any existing dataset types.
3161 registry.queryDatasets("nonexistent", collections=...),
3162 ["nonexistent"],
3163 ),
3164 (
3165 # Dataset type object isn't registered.
3166 registry.queryDatasets(
3167 DatasetType(
3168 "nonexistent",
3169 dimensions=["instrument"],
3170 universe=registry.dimensions,
3171 storageClass="Image",
3172 ),
3173 collections=...,
3174 ),
3175 ["nonexistent"],
3176 ),
3177 (
3178 # No datasets of this type in this collection.
3179 registry.queryDatasets("flat", collections=["biases"]),
3180 ["flat", "biases"],
3181 ),
3182 (
3183 # No datasets of this type in this collection.
3184 base_query.findDatasets("flat", collections=["biases"]),
3185 ["flat", "biases"],
3186 ),
3187 (
3188 # No collections matching at all.
3189 registry.queryDatasets("flat", collections="potato*"),
3190 ["potato"],
3191 ),
3192 ]
3193 with self.assertRaises(MissingDatasetTypeError):
3194 # Dataset type name doesn't match any existing dataset types.
3195 list(registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...))
3196 with self.assertRaises(MissingDatasetTypeError):
3197 # Dataset type name doesn't match any existing dataset types.
3198 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...).any()
3199 with self.assertRaises(DatasetTypeExpressionError):
3200 # queryDimensionRecords does not allow dataset type wildcards.
3201 registry.queryDimensionRecords("detector", datasets=["f*"], collections=...).any()
3202 for query, snippets in queries_and_snippets:
3203 self.assertFalse(query.any(execute=False, exact=False))
3204 self.assertFalse(query.any(execute=True, exact=False))
3205 self.assertFalse(query.any(execute=True, exact=True))
3206 self.assertEqual(query.count(exact=False), 0)
3207 self.assertEqual(query.count(exact=True), 0)
3208 messages = list(query.explain_no_results())
3209 self.assertTrue(messages)
3210 # Want all expected snippets to appear in at least one message.
3211 self.assertTrue(
3212 any(
3213 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
3214 ),
3215 messages,
3216 )
3218 # Wildcards on dataset types are not permitted in queryDataIds.
3219 with self.assertRaises(DatasetTypeExpressionError):
3220 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
3222 # This query yields four overlaps in the database, but one is filtered
3223 # out in postprocessing. The count queries aren't accurate because
3224 # they don't account for duplication that happens due to an internal
3225 # join against commonSkyPix.
3226 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
3227 self.assertEqual(
3228 {
3229 DataCoordinate.standardize(
3230 instrument="Cam1",
3231 skymap="SkyMap1",
3232 visit=v,
3233 tract=t,
3234 universe=registry.dimensions,
3235 )
3236 for v, t in [(1, 0), (2, 0), (2, 1)]
3237 },
3238 set(query3),
3239 )
3240 self.assertTrue(query3.any(execute=False, exact=False))
3241 self.assertTrue(query3.any(execute=True, exact=False))
3242 self.assertTrue(query3.any(execute=True, exact=True))
3243 self.assertGreaterEqual(query3.count(exact=False), 3)
3244 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
3245 self.assertFalse(list(query3.explain_no_results()))
3246 # This query yields overlaps in the database, but all are filtered
3247 # out in postprocessing. The count queries again aren't very useful.
3248 # We have to use `where=` here to avoid an optimization that
3249 # (currently) skips the spatial postprocess-filtering because it
3250 # recognizes that no spatial join is necessary. That's not ideal, but
3251 # fixing it is out of scope for this ticket.
3252 query4 = registry.queryDataIds(
3253 ["visit", "tract"],
3254 instrument="Cam1",
3255 skymap="SkyMap1",
3256 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
3257 )
3258 self.assertFalse(set(query4))
3259 self.assertTrue(query4.any(execute=False, exact=False))
3260 self.assertTrue(query4.any(execute=True, exact=False))
3261 self.assertFalse(query4.any(execute=True, exact=True))
3262 self.assertGreaterEqual(query4.count(exact=False), 1)
3263 self.assertEqual(query4.count(exact=True, discard=True), 0)
3264 # This query should yield results from one dataset type but not the
3265 # other, which is not registered.
3266 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
3267 self.assertTrue(set(query5))
3268 self.assertTrue(query5.any(execute=False, exact=False))
3269 self.assertTrue(query5.any(execute=True, exact=False))
3270 self.assertTrue(query5.any(execute=True, exact=True))
3271 self.assertGreaterEqual(query5.count(exact=False), 1)
3272 self.assertGreaterEqual(query5.count(exact=True), 1)
3273 # This query applies a selection that yields no results, fully in the
3274 # database. Explaining why it fails involves traversing the relation
3275 # tree and running a LIMIT 1 query at each level that has the potential
3276 # to remove rows.
3277 query6 = registry.queryDimensionRecords(
3278 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
3279 )
3280 self.assertEqual(query6.count(exact=True), 0)
3281 self.assertFalse(query6.any())
3283 def testQueryDataIdsExpressionError(self):
3284 """Test error checking of 'where' expressions in queryDataIds."""
3285 butler = self.make_butler()
3286 registry = butler.registry
3287 self.load_data(butler, "base.yaml")
3288 bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
3289 # The diagnostics raised are slightly different between the old query
3290 # system (ValueError, first error string) and the new query system
3291 # (InvalidQueryError, second error string).
3292 with self.assertRaisesRegex(
3293 (LookupError, InvalidQueryError),
3294 r"(No dimension element with name 'foo' in 'foo\.bar'\.)|(Unrecognized identifier 'foo.bar')",
3295 ):
3296 list(registry.queryDataIds(["detector"], where="foo.bar = 12"))
3297 with self.assertRaisesRegex(
3298 (LookupError, InvalidQueryError),
3299 "(Dimension element name cannot be inferred in this context.)"
3300 "|(Unrecognized identifier 'timespan')",
3301 ):
3302 list(registry.queryDataIds(["detector"], where="timespan.end < :time", bind=bind))
3304 def testQueryDataIdsOrderBy(self):
3305 """Test order_by and limit on result returned by queryDataIds()."""
3306 butler = self.make_butler()
3307 registry = butler.registry
3308 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
3310 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
3311 return registry.queryDataIds(
3312 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
3313 )
3315 Test = namedtuple(
3316 "testQueryDataIdsOrderByTest",
3317 ("order_by", "keys", "result", "limit", "datasets", "collections"),
3318 defaults=(None, None, None),
3319 )
3321 test_data = [
3322 Test("tract,visit", "tract,visit", ((0, 1), (0, 2), (1, 2))),
3323 Test("-tract,visit", "tract,visit", ((1, 2), (0, 1), (0, 2))),
3324 Test("tract,-visit", "tract,visit", ((0, 2), (0, 1), (1, 2))),
3325 Test("-tract,-visit", "tract,visit", ((1, 2), (0, 2), (0, 1))),
3326 Test("tract.id,visit.id", "tract,visit", ((0, 1),), limit=(1,)),
3327 Test("-tract,-visit", "tract,visit", ((1, 2),), limit=(1,)),
3328 Test("tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 1), (1, 2))),
3329 Test("-tract,-visit.exposure_time", "tract,visit", ((1, 2), (0, 1), (0, 2))),
3330 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 2), (1, 2))),
3331 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 2), (1, 2))),
3332 Test(
3333 "tract,-visit.timespan.begin,visit.timespan.end",
3334 "tract,visit",
3335 ((0, 2), (0, 1), (1, 2)),
3336 ),
3337 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
3338 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
3339 Test(
3340 "tract,detector",
3341 "tract,detector",
3342 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3343 datasets="flat",
3344 collections="imported_r",
3345 ),
3346 Test(
3347 "tract,detector.full_name",
3348 "tract,detector",
3349 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3350 datasets="flat",
3351 collections="imported_r",
3352 ),
3353 Test(
3354 "tract,detector.raft,detector.name_in_raft",
3355 "tract,detector",
3356 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
3357 datasets="flat",
3358 collections="imported_r",
3359 ),
3360 ]
3362 for test in test_data:
3363 with self.subTest(test=repr(test)):
3364 order_by = test.order_by.split(",")
3365 keys = test.keys.split(",")
3366 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
3367 if test.limit is not None:
3368 query = query.limit(*test.limit)
3369 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
3370 self.assertEqual(dataIds, test.result)
3372 # and materialize
3373 query = do_query(keys).order_by(*order_by)
3374 if test.limit is not None:
3375 query = query.limit(*test.limit)
3377 # Test exceptions for errors in a name.
3378 # Many of these raise slightly different diagnostics in the old query
3379 # system (ValueError, first error string) than the new query system
3380 # (InvalidQueryError, second error string).
3381 for order_by in ("", "-"):
3382 with self.assertRaisesRegex((ValueError, InvalidQueryError), "Empty dimension name in ORDER BY"):
3383 list(do_query().order_by(order_by))
3385 for order_by in ("undimension.name", "-undimension.name"):
3386 with self.assertRaisesRegex(
3387 (ValueError, InvalidQueryError),
3388 "(Unknown dimension element 'undimension')|(Unrecognized identifier 'undimension.name')",
3389 ):
3390 list(do_query().order_by(order_by))
3392 for order_by in ("attract", "-attract"):
3393 with self.assertRaisesRegex(
3394 (ValueError, InvalidQueryError),
3395 "(Metadata 'attract' cannot be found in any dimension)|(Unrecognized identifier 'attract')",
3396 ):
3397 list(do_query().order_by(order_by))
3399 with self.assertRaisesRegex(
3400 (ValueError, InvalidQueryError),
3401 "(Metadata 'exposure_time' exists in more than one dimension)"
3402 "|(Ambiguous identifier 'exposure_time' matches multiple fields)",
3403 ):
3404 list(do_query(("exposure", "visit")).order_by("exposure_time"))
3406 with self.assertRaisesRegex(
3407 (ValueError, InvalidQueryError),
3408 r"(Timespan exists in more than one dimension element \(day_obs, exposure, visit\); "
3409 r"qualify timespan with specific dimension name\.)|"
3410 r"(Ambiguous identifier 'timespan' matches multiple fields)",
3411 ):
3412 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
3414 with self.assertRaisesRegex(
3415 (ValueError, InvalidQueryError),
3416 "(Cannot find any temporal dimension element for 'timespan.begin')"
3417 "|(Unrecognized identifier 'timespan')",
3418 ):
3419 list(do_query("tract").order_by("timespan.begin"))
3421 with self.assertRaisesRegex(
3422 (ValueError, InvalidQueryError),
3423 "(Cannot use 'timespan.begin' with non-temporal element)"
3424 "|(Unrecognized field 'timespan' for tract)",
3425 ):
3426 list(do_query("tract").order_by("tract.timespan.begin"))
3428 with self.assertRaisesRegex(
3429 (ValueError, InvalidQueryError),
3430 "(Field 'name' does not exist in 'tract')|(Unrecognized field 'name' for tract.)",
3431 ):
3432 list(do_query("tract").order_by("tract.name"))
3434 with self.assertRaisesRegex(
3435 (ValueError, InvalidQueryError),
3436 r"(Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?)"
3437 r"|(Unrecognized identifier 'timestamp.begin')",
3438 ):
3439 list(do_query("visit").order_by("timestamp.begin"))
3441 def testQueryDataIdsGovernorExceptions(self):
3442 """Test exceptions raised by queryDataIds() for incorrect governors."""
3443 butler = self.make_butler()
3444 registry = butler.registry
3445 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
3447 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
3448 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
3450 Test = namedtuple(
3451 "testQueryDataIdExceptionsTest",
3452 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
3453 defaults=(None, None, None, {}, None, 0),
3454 )
3456 test_data = (
3457 Test("tract,visit", count=3),
3458 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=3),
3459 Test("tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, count=0),
3460 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=3),
3461 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, count=0),
3462 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=3),
3463 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", count=0),
3464 Test(
3465 "tract,visit",
3466 where="instrument=:cam AND skymap=:map",
3467 bind={"cam": "Cam1", "map": "SkyMap1"},
3468 count=3,
3469 ),
3470 Test(
3471 "tract,visit",
3472 where="instrument=:cam AND skymap=:map",
3473 bind={"cam": "Cam", "map": "SkyMap"},
3474 count=0,
3475 ),
3476 )
3478 for test in test_data:
3479 print(test)
3480 dimensions = test.dimensions.split(",")
3481 if test.exception:
3482 with self.assertRaises(test.exception):
3483 with ExitStack() as stack:
3484 if test.exception == DataIdValueError:
3485 stack.enter_context(self.assertWarns(FutureWarning))
3486 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
3487 else:
3488 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3489 print(list(query))
3490 self.assertEqual(query.count(discard=True), test.count)
3492 # and materialize
3493 if test.exception:
3494 with self.assertRaises(test.exception):
3495 with ExitStack() as stack:
3496 if test.exception == DataIdValueError:
3497 stack.enter_context(self.assertWarns(FutureWarning))
3498 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3499 with query.materialize() as materialized:
3500 materialized.count(discard=True)
3501 else:
3502 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
3503 with query.materialize() as materialized:
3504 self.assertEqual(materialized.count(discard=True), test.count)
3506 def testQueryDimensionRecordsOrderBy(self):
3507 """Test order_by and limit on result returned by
3508 queryDimensionRecords().
3509 """
3510 butler = self.make_butler()
3511 registry = butler.registry
3512 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
3514 def do_query(element, datasets=None, collections=None):
3515 return registry.queryDimensionRecords(
3516 element, instrument="Cam1", datasets=datasets, collections=collections
3517 )
3519 query = do_query("detector")
3520 self.assertEqual(len(list(query)), 4)
3522 Test = namedtuple(
3523 "testQueryDataIdsOrderByTest",
3524 ("element", "order_by", "result", "limit", "datasets", "collections"),
3525 defaults=(None, None, None),
3526 )
3528 test_data = [
3529 Test("detector", "detector", (1, 2, 3, 4)),
3530 Test("detector", "-detector", (4, 3, 2, 1)),
3531 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
3532 Test("detector", "-detector.purpose", (4,), limit=(1,)),
3533 Test("visit", "visit", (1, 2)),
3534 Test("visit", "-visit.id", (2, 1)),
3535 Test("visit", "zenith_angle", (1, 2)),
3536 Test("visit", "-visit.name", (2, 1)),
3537 Test("visit", "day_obs,-visit.timespan.begin", (2, 1)),
3538 ]
3540 def do_test(test: Test):
3541 order_by = test.order_by.split(",")
3542 query = do_query(test.element).order_by(*order_by)
3543 if test.limit is not None:
3544 query = query.limit(*test.limit)
3545 dataIds = tuple(rec.id for rec in query)
3546 self.assertEqual(dataIds, test.result)
3548 for test in test_data:
3549 do_test(test)
3551 # errors in a name
3552 for order_by in ("", "-"):
3553 with self.assertRaisesRegex(
3554 (ValueError, InvalidQueryError),
3555 "(Empty dimension name in ORDER BY)|(Unrecognized identifier)",
3556 ):
3557 list(do_query("detector").order_by(order_by))
3559 for order_by in ("undimension.name", "-undimension.name"):
3560 with self.assertRaisesRegex(
3561 (ValueError, InvalidQueryError),
3562 "(Element name mismatch: 'undimension')|(Unrecognized identifier)",
3563 ):
3564 list(do_query("detector").order_by(order_by))
3566 for order_by in ("attract", "-attract"):
3567 with self.assertRaisesRegex(
3568 (ValueError, InvalidQueryError),
3569 "(Field 'attract' does not exist in 'detector'.)|(Unrecognized identifier)",
3570 ):
3571 list(do_query("detector").order_by(order_by))
3573 for order_by in ("timestamp.begin", "-timestamp.begin"):
3574 with self.assertRaisesRegex(
3575 (ValueError, InvalidQueryError),
3576 r"(Element name mismatch: 'timestamp' instead of 'visit'; "
3577 r"perhaps you meant 'timespan.begin'\?)"
3578 r"|(Unrecognized identifier)",
3579 ):
3580 list(do_query("visit").order_by(order_by))
3582 def testQueryDimensionRecordsExceptions(self):
3583 """Test exceptions raised by queryDimensionRecords()."""
3584 butler = self.make_butler()
3585 registry = butler.registry
3586 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
3588 result = registry.queryDimensionRecords("detector")
3589 self.assertEqual(result.count(), 4)
3590 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3591 self.assertEqual(result.count(), 4)
3592 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3593 self.assertEqual(result.count(), 4)
3595 # Test that values specified in kwargs override those specified in
3596 # dataId.
3597 result = registry.queryDimensionRecords(
3598 "detector", dataId={"instrument": "NotCam1"}, instrument="Cam1"
3599 )
3600 self.assertEqual(result.count(), 4)
3602 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3603 self.assertEqual(result.count(), 4)
3604 result = registry.queryDimensionRecords("detector", where="instrument=:instr", bind={"instr": "Cam1"})
3605 self.assertTrue(result.any())
3606 self.assertEqual(result.count(), 4)
3608 def testDatasetConstrainedDimensionRecordQueries(self):
3609 """Test that queryDimensionRecords works even when given a dataset
3610 constraint whose dimensions extend beyond the requested dimension
3611 element's.
3612 """
3613 butler = self.make_butler()
3614 registry = butler.registry
3615 self.load_data(butler, "base.yaml", "datasets.yaml")
3616 # Query for physical_filter dimension records, using a dataset that
3617 # has both physical_filter and dataset dimensions.
3618 records = registry.queryDimensionRecords(
3619 "physical_filter",
3620 datasets=["flat"],
3621 collections="imported_r",
3622 )
3623 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3624 # Trying to constrain by all dataset types is an error.
3625 with self.assertRaises(TypeError):
3626 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3628 def testSkyPixDatasetQueries(self):
3629 """Test that we can build queries involving skypix dimensions as long
3630 as a dataset type that uses those dimensions is included.
3631 """
3632 butler = self.make_butler()
3633 registry = butler.registry
3634 self.load_data(butler, "base.yaml")
3635 dataset_type = DatasetType(
3636 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3637 )
3638 registry.registerDatasetType(dataset_type)
3639 run = "r"
3640 registry.registerRun(run)
3641 # First try queries where there are no datasets; the concern is whether
3642 # we can even build and execute these queries without raising, even
3643 # when "doomed" query shortcuts are in play.
3644 self.assertFalse(
3645 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3646 )
3647 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3648 # Now add a dataset and see that we can get it back.
3649 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3650 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3651 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3652 self.assertEqual(
3653 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3654 {data_id},
3655 )
3656 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3658 def testDatasetIdFactory(self):
3659 """Simple test for DatasetIdFactory, mostly to catch potential changes
3660 in its API.
3661 """
3662 butler = self.make_butler()
3663 registry = butler.registry
3664 factory = DatasetIdFactory()
3665 dataset_type = DatasetType(
3666 "datasetType",
3667 dimensions=["detector", "instrument"],
3668 universe=registry.dimensions,
3669 storageClass="int",
3670 )
3671 run = "run"
3672 data_id = DataCoordinate.standardize(
3673 instrument="Cam1", detector=1, dimensions=dataset_type.dimensions
3674 )
3676 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3677 self.assertIsInstance(datasetId, uuid.UUID)
3678 self.assertEqual(datasetId.version, 7)
3680 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3681 self.assertIsInstance(datasetId, uuid.UUID)
3682 self.assertEqual(datasetId.version, 5)
3684 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3685 self.assertIsInstance(datasetId, uuid.UUID)
3686 self.assertEqual(datasetId.version, 5)
3688 def testExposureQueries(self):
3689 """Test query methods using arguments sourced from the exposure log
3690 service.
3692 The most complete test dataset currently available to daf_butler tests
3693 is ci_hsc-subset.yaml export , but that does not have 'exposure'
3694 dimension records. So in this test we need to translate queries that
3695 originally used the exposure dimension to use the (very similar) visit
3696 dimension instead.
3697 """
3698 butler = self.make_butler()
3699 registry = butler.registry
3700 self.load_data(butler, "ci_hsc-subset.yaml")
3701 self.assertEqual(
3702 [
3703 record.id
3704 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3705 .order_by("visit")
3706 .limit(5)
3707 ],
3708 [903334, 903336, 903338, 903342, 903344],
3709 )
3710 self.assertEqual(
3711 [
3712 data_id["visit"]
3713 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("visit").limit(5)
3714 ],
3715 [903334, 903336, 903338, 903342, 903344],
3716 )
3717 self.assertEqual(
3718 [
3719 record.id
3720 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3721 .order_by("full_name")
3722 .limit(5)
3723 ],
3724 [25, 24, 23, 22, 18],
3725 )
3726 self.assertEqual(
3727 [
3728 data_id["detector"]
3729 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3730 .order_by("full_name")
3731 .limit(5)
3732 ],
3733 [25, 24, 23, 22, 18],
3734 )
3736 def test_long_query_names(self) -> None:
3737 """Test that queries involving very long names are handled correctly.
3739 This is especially important for PostgreSQL, which truncates symbols
3740 longer than 64 chars, but it's worth testing for all DBs.
3741 """
3742 butler = self.make_butler()
3743 registry = butler.registry
3744 name = "abcd" * 17
3745 registry.registerDatasetType(
3746 DatasetType(
3747 name,
3748 dimensions=(),
3749 storageClass="Exposure",
3750 universe=registry.dimensions,
3751 )
3752 )
3753 # Need to search more than one collection actually containing a
3754 # matching dataset to avoid optimizations that sidestep bugs due to
3755 # truncation by making findFirst=True a no-op.
3756 run1 = "run1"
3757 registry.registerRun(run1)
3758 run2 = "run2"
3759 registry.registerRun(run2)
3760 (ref1,) = registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run1)
3761 registry.insertDatasets(name, [DataCoordinate.make_empty(registry.dimensions)], run2)
3762 self.assertEqual(
3763 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3764 {ref1},
3765 )
3767 def test_skypix_constraint_queries(self) -> None:
3768 """Test queries spatially constrained by a skypix data ID."""
3769 butler = self.make_butler()
3770 registry = butler.registry
3771 self.load_data(butler, "base.yaml", "spatial.yaml")
3772 patch_regions = {
3773 (data_id["tract"], data_id["patch"]): data_id.region
3774 for data_id in registry.queryDataIds(["patch"]).expanded()
3775 }
3776 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3777 # This check ensures the test doesn't become trivial due to a config
3778 # change; if it does, just pick a different HTML level.
3779 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3780 # Gather all skypix IDs that definitely overlap at least one of these
3781 # patches.
3782 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3783 for patch_region in patch_regions.values():
3784 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3785 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3786 # and does not overlap at least one other patch.
3787 for skypix_id in itertools.chain.from_iterable(
3788 range(begin, end) for begin, end in relevant_skypix_ids
3789 ):
3790 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3791 overlapping_patches = {
3792 patch_key
3793 for patch_key, patch_region in patch_regions.items()
3794 if not patch_region.isDisjointFrom(skypix_region)
3795 }
3796 if overlapping_patches and overlapping_patches != patch_regions.keys():
3797 break
3798 else:
3799 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3800 # Test that a three-way join that includes the common skypix system in
3801 # the dimensions doesn't generate redundant join terms in the query.
3802 with self.assertRaises(InvalidQueryError):
3803 set(
3804 registry.queryDataIds(
3805 ["tract", "visit", "htm7"], skymap="SkyMap1", instrument="Cam1"
3806 ).expanded()
3807 )
3809 def test_spatial_constraint_queries(self) -> None:
3810 """Test queries in which one spatial dimension in the constraint (data
3811 ID or ``where`` string) constrains a different spatial dimension in the
3812 query result columns.
3813 """
3814 butler = self.make_butler()
3815 registry = butler.registry
3816 self.load_data(butler, "base.yaml", "spatial.yaml")
3817 patch_regions = {
3818 (data_id["tract"], data_id["patch"]): data_id.region
3819 for data_id in registry.queryDataIds(["patch"]).expanded()
3820 }
3821 observation_regions = {
3822 (data_id["visit"], data_id["detector"]): data_id.region
3823 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3824 }
3825 all_combos = {
3826 (patch_key, observation_key)
3827 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3828 }
3829 overlapping_combos = {
3830 (patch_key, observation_key)
3831 for patch_key, observation_key in all_combos
3832 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3833 }
3834 # Check a direct spatial join with no constraint first.
3835 self.assertEqual(
3836 {
3837 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3838 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3839 },
3840 overlapping_combos,
3841 )
3842 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3843 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3844 for patch_key, observation_key in overlapping_combos:
3845 overlaps_by_patch[patch_key].add(observation_key)
3846 overlaps_by_observation[observation_key].add(patch_key)
3847 # Find patches and observations that overlap at least one of the other
3848 # but not all of the other.
3849 nontrivial_patch = next(
3850 iter(
3851 patch_key
3852 for patch_key, observation_keys in overlaps_by_patch.items()
3853 if observation_keys and observation_keys != observation_regions.keys()
3854 )
3855 )
3856 nontrivial_observation = next(
3857 iter(
3858 observation_key
3859 for observation_key, patch_keys in overlaps_by_observation.items()
3860 if patch_keys and patch_keys != patch_regions.keys()
3861 )
3862 )
3863 # Use the nontrivial patches and observations as constraints on the
3864 # other dimensions in various ways, first via a 'where' expression.
3865 # It's better in general to us 'bind' instead of f-strings, but these
3866 # all integers so there are no quoting concerns.
3867 self.assertEqual(
3868 {
3869 (data_id["visit"], data_id["detector"])
3870 for data_id in registry.queryDataIds(
3871 ["visit", "detector"],
3872 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3873 skymap="SkyMap1",
3874 )
3875 },
3876 overlaps_by_patch[nontrivial_patch],
3877 )
3878 self.assertEqual(
3879 {
3880 (data_id["tract"], data_id["patch"])
3881 for data_id in registry.queryDataIds(
3882 ["patch"],
3883 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3884 instrument="Cam1",
3885 )
3886 },
3887 overlaps_by_observation[nontrivial_observation],
3888 )
3889 # and then via the dataId argument.
3890 self.assertEqual(
3891 {
3892 (data_id["visit"], data_id["detector"])
3893 for data_id in registry.queryDataIds(
3894 ["visit", "detector"],
3895 dataId={
3896 "tract": nontrivial_patch[0],
3897 "patch": nontrivial_patch[1],
3898 },
3899 skymap="SkyMap1",
3900 )
3901 },
3902 overlaps_by_patch[nontrivial_patch],
3903 )
3904 self.assertEqual(
3905 {
3906 (data_id["tract"], data_id["patch"])
3907 for data_id in registry.queryDataIds(
3908 ["patch"],
3909 dataId={
3910 "visit": nontrivial_observation[0],
3911 "detector": nontrivial_observation[1],
3912 },
3913 instrument="Cam1",
3914 )
3915 },
3916 overlaps_by_observation[nontrivial_observation],
3917 )
3919 def test_query_empty_collections(self) -> None:
3920 """Test for registry query methods with empty collections. The methods
3921 should return empty result set (or None when applicable) and provide
3922 "doomed" diagnostics.
3923 """
3924 butler = self.make_butler()
3925 registry = butler.registry
3926 self.load_data(butler, "base.yaml", "datasets.yaml")
3928 # Tests for registry.findDataset()
3929 with self.assertRaises(NoDefaultCollectionError):
3930 registry.findDataset("bias", instrument="Cam1", detector=1)
3931 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3932 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3934 # Tests for registry.queryDatasets()
3935 with self.assertRaises(NoDefaultCollectionError):
3936 registry.queryDatasets("bias")
3937 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3939 result = registry.queryDatasets("bias", collections=[])
3940 self.assertEqual(len(list(result)), 0)
3941 messages = list(result.explain_no_results())
3942 self.assertTrue(messages)
3943 self.assertTrue(any("because collection list is empty" in message for message in messages))
3945 # Tests for registry.queryDataIds()
3946 with self.assertRaises(NoDefaultCollectionError):
3947 registry.queryDataIds("detector", datasets="bias")
3948 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3950 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3951 self.assertEqual(len(list(result)), 0)
3952 messages = list(result.explain_no_results())
3953 self.assertTrue(messages)
3954 self.assertTrue(any("because collection list is empty" in message for message in messages))
3956 # Tests for registry.queryDimensionRecords()
3957 with self.assertRaises(NoDefaultCollectionError):
3958 registry.queryDimensionRecords("detector", datasets="bias")
3959 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3961 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3962 self.assertEqual(len(list(result)), 0)
3963 messages = list(result.explain_no_results())
3964 self.assertTrue(messages)
3965 self.assertTrue(any("because collection list is empty" in message for message in messages))
3967 def test_dataset_followup_spatial_joins(self) -> None:
3968 """Test queryDataIds(...).findRelatedDatasets(...) where a spatial join
3969 is involved.
3970 """
3971 butler = self.make_butler()
3972 registry = butler.registry
3973 self.load_data(butler, "base.yaml", "spatial.yaml")
3974 pvi_dataset_type = DatasetType(
3975 "pvi", {"visit", "detector"}, storageClass="StructuredDataDict", universe=registry.dimensions
3976 )
3977 registry.registerDatasetType(pvi_dataset_type)
3978 collection = "datasets"
3979 registry.registerRun(collection)
3980 (pvi1,) = registry.insertDatasets(
3981 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 1}], run=collection
3982 )
3983 (pvi2,) = registry.insertDatasets(
3984 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 2}], run=collection
3985 )
3986 (pvi3,) = registry.insertDatasets(
3987 pvi_dataset_type, [{"instrument": "Cam1", "visit": 1, "detector": 3}], run=collection
3988 )
3989 self.assertEqual(
3990 set(
3991 registry.queryDataIds(["patch"], skymap="SkyMap1", tract=0)
3992 .expanded()
3993 .findRelatedDatasets("pvi", [collection])
3994 ),
3995 {
3996 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi1),
3997 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=0), pvi2),
3998 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=1), pvi2),
3999 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi1),
4000 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi2),
4001 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=2), pvi3),
4002 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=3), pvi2),
4003 (registry.expandDataId(skymap="SkyMap1", tract=0, patch=4), pvi3),
4004 },
4005 )
4007 def test_expanded_data_id_queries(self) -> None:
4008 """Tests for basic functionality of expanded() on queryDataIds and
4009 queryDatasets.
4010 """
4011 butler = self.make_butler()
4012 registry = butler.registry
4013 self.load_data(butler, "base.yaml", "spatial.yaml", "datasets.yaml")
4015 result_obj = (
4016 registry.queryDataIds(["visit"], where="instrument = 'Cam1' and (visit.id = 1 or visit.id = 2)")
4017 .expanded()
4018 .order_by("visit.id")
4019 )
4020 self.assertTrue(result_obj.hasRecords())
4021 visits = list(result_obj)
4022 self.assertEqual(len(visits), 2)
4024 self.assertEqual(visits[0]["visit"], 1)
4025 self.assertEqual(visits[1]["visit"], 2)
4026 self.assertEqual(visits[0].records["visit"].exposure_time, 60.0)
4027 self.assertEqual(visits[1].records["visit"].exposure_time, 45.0)
4028 # physical_filter is a "cacheable" dimension, so its records are loaded
4029 # from local cache rather than being part of the DB rows.
4030 self.assertEqual(visits[0].records["physical_filter"].band, "g")
4031 self.assertEqual(visits[1].records["physical_filter"].band, "r")
4033 # Make sure that we can fetch nulls in dimension records
4034 registry.insertDimensionData(
4035 "detector",
4036 {
4037 "instrument": "Cam1",
4038 "id": 5,
4039 "raft": "Z",
4040 "name_in_raft": "z",
4041 "full_name": "Zz",
4042 "purpose": None,
4043 },
4044 )
4045 detectors = list(
4046 registry.queryDataIds("detector", dataId={"instrument": "Cam1", "detector": 5}).expanded()
4047 )
4048 self.assertIsNone(detectors[0].records["detector"].purpose)
4050 datasets_query = registry.queryDatasets(
4051 "flat", collections="imported_g", where="instrument = 'Cam1' and detector <= 3"
4052 ).expanded()
4053 datasets = list(datasets_query)
4054 datasets.sort(key=lambda ref: ref.dataId["detector"])
4055 self.assertEqual(len(datasets), 2)
4056 self.assertEqual(datasets[0].id, uuid.UUID("60c8a65c-7290-4c38-b1de-e3b1cdcf872d"))
4057 self.assertEqual(datasets[1].id, uuid.UUID("84239e7f-c41f-46d5-97b9-a27976b98ceb"))
4058 # All of the dimensions for flat are "cached" dimensions.
4059 self.assertEqual(datasets[0].dataId.records["detector"].full_name, "Ab")
4060 self.assertEqual(datasets[1].dataId.records["detector"].full_name, "Ba")
4061 self.assertEqual(datasets[0].dataId.records["instrument"].visit_system, 1)
4062 assert isinstance(datasets_query, ParentDatasetQueryResults)
4063 data_ids = list(datasets_query.dataIds)
4064 data_ids.sort(key=lambda data_id: data_id["detector"])
4065 self.assertEqual(len(data_ids), 2)
4066 self.assertEqual(data_ids[0].records["detector"].full_name, "Ab")
4067 self.assertEqual(data_ids[1].records["detector"].full_name, "Ba")
4068 self.assertEqual(data_ids[0].records["instrument"].visit_system, 1)
4070 # None of the datasets in the test data include any uncached
4071 # dimensions, so we have to set one up.
4072 registry.registerDatasetType(DatasetType("test", ["visit"], "int", universe=registry.dimensions))
4073 registry.insertDatasets("test", [{"instrument": "Cam1", "visit": 1}], run="imported_g")
4074 ref = list(registry.queryDatasets("test", collections="imported_g").expanded())[0]
4075 self.assertEqual(ref.dataId.records["visit"].zenith_angle, 5.0)
4076 self.assertEqual(ref.dataId.records["physical_filter"].band, "g")
4077 self.assertEqual(
4078 ref.dataId.timespan,
4079 Timespan(
4080 begin=astropy.time.Time("2021-09-09 03:00:00.000000000", scale="tai"),
4081 end=astropy.time.Time("2021-09-09 03:01:00.000000000", scale="tai"),
4082 ),
4083 )
4085 def test_collection_summary(self) -> None:
4086 """Test for collection summary methods."""
4087 butler = self.make_butler()
4088 registry = butler.registry
4089 self.load_data(butler, "base.yaml", "datasets.yaml", "spatial.yaml")
4091 # Add one more dataset type, just for its existence to trigger a bug
4092 # in `associate` (DM-44311).
4093 test_dataset_type = DatasetType("test", ["tract", "patch"], "int", universe=registry.dimensions)
4094 registry.registerDatasetType(test_dataset_type)
4096 # Check for what has been imported.
4097 summary = registry.getCollectionSummary("imported_g")
4098 self.assertEqual(summary.dataset_types.names, {"bias", "flat"})
4099 self.assertEqual(summary.governors, {"instrument": {"Cam1"}})
4101 # Make a tagged collection and associate some datasets.
4102 tagged_coll = "tagged"
4103 registry.registerCollection(tagged_coll, CollectionType.TAGGED)
4104 refsets = registry.queryDatasets(..., collections=["imported_g"]).byParentDatasetType()
4105 for refs in refsets:
4106 registry.associate(tagged_coll, refs)
4108 # Summary has to have the same dataset types.
4109 summary = registry.getCollectionSummary(tagged_coll)
4110 self.assertEqual(summary.dataset_types.names, {"bias", "flat"})
4111 self.assertEqual(summary.governors, {"instrument": {"Cam1"}})
4113 # Remove all datasets from the tagged collection.
4114 refs = list(registry.queryDatasets(..., collections=[tagged_coll]))
4115 registry.disassociate(tagged_coll, refs)
4117 # Summaries should not have changed.
4118 summary = registry.getCollectionSummary(tagged_coll)
4119 self.assertEqual(summary.dataset_types.names, {"bias", "flat"})
4120 self.assertEqual(summary.governors, {"instrument": {"Cam1"}})
4122 # Cleanup summaries.
4123 registry.refresh_collection_summaries()
4124 summary = registry.getCollectionSummary(tagged_coll)
4125 self.assertFalse(summary.dataset_types.names)
4126 # We do not clean governor summaries yet, but because how the query is
4127 # run, it returns empty governors when collection is missing from
4128 # summaries.
4129 self.assertFalse(summary.governors)
4131 # Add dataset with different governor, this is to test that governors
4132 # are not actually cleaned.
4133 refs = registry.insertDatasets("test", [{"skymap": "SkyMap1", "tract": 0, "patch": 0}], "imported_g")
4134 registry.associate(tagged_coll, refs)
4135 summary = registry.getCollectionSummary(tagged_coll)
4136 self.assertEqual(summary.dataset_types.names, {"test"})
4137 # Note that instrument governor resurrects here, even though there are
4138 # no datasets left with that governor.
4139 self.assertEqual(summary.governors, {"instrument": {"Cam1"}, "skymap": {"SkyMap1"}})
4141 def test_temp_table_config(self) -> None:
4142 config = self.makeRegistryConfig()
4143 config["temporary_tables"] = False
4144 self.assertEqual(config.areTemporaryTablesAllowed, False)
4145 butler = self.make_butler(config)
4146 if not isinstance(butler, DirectButler):
4147 raise unittest.SkipTest("Test only makes sense for registry with direct database connection.")
4148 self.assertEqual(butler._registry._db.supports_temporary_tables, False)
4149 with self.assertRaisesRegex(ReadOnlyDatabaseError, "temporary tables"):
4150 with butler._registry._db.temporary_table(...):
4151 pass