Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%
1434 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-04-13 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
47from ...core import (
48 DataCoordinate,
49 DataCoordinateSet,
50 DatasetAssociation,
51 DatasetRef,
52 DatasetType,
53 DimensionGraph,
54 NamedValueSet,
55 SkyPixDimension,
56 StorageClass,
57 Timespan,
58 ddl,
59)
60from .._collection_summary import CollectionSummary
61from .._collectionType import CollectionType
62from .._config import RegistryConfig
63from .._exceptions import (
64 ArgumentError,
65 CollectionError,
66 CollectionTypeError,
67 ConflictingDefinitionError,
68 DataIdValueError,
69 DatasetTypeError,
70 InconsistentDataIdError,
71 MissingCollectionError,
72 MissingDatasetTypeError,
73 OrphanedRecordError,
74)
75from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
77if TYPE_CHECKING:
78 from .._registry import Registry
81class RegistryTests(ABC):
82 """Generic tests for the `Registry` class that can be subclassed to
83 generate tests for different configurations.
84 """
86 collectionsManager: Optional[str] = None
87 """Name of the collections manager class, if subclass provides value for
88 this member then it overrides name specified in default configuration
89 (`str`).
90 """
92 datasetsManager: Optional[str | dict[str, str]] = None
93 """Name or configuration dictionary of the datasets manager class, if
94 subclass provides value for this member then it overrides name specified
95 in default configuration (`str` or `dict`).
96 """
98 @classmethod
99 @abstractmethod
100 def getDataDir(cls) -> str:
101 """Return the root directory containing test data YAML files."""
102 raise NotImplementedError()
104 def makeRegistryConfig(self) -> RegistryConfig:
105 """Create RegistryConfig used to create a registry.
107 This method should be called by a subclass from `makeRegistry`.
108 Returned instance will be pre-configured based on the values of class
109 members, and default-configured for all other parameters. Subclasses
110 that need default configuration should just instantiate
111 `RegistryConfig` directly.
112 """
113 config = RegistryConfig()
114 if self.collectionsManager:
115 config["managers", "collections"] = self.collectionsManager
116 if self.datasetsManager:
117 config["managers", "datasets"] = self.datasetsManager
118 return config
120 @abstractmethod
121 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
122 """Return the Registry instance to be tested.
124 Parameters
125 ----------
126 share_repo_with : `Registry`, optional
127 If provided, the new registry should point to the same data
128 repository as this existing registry.
130 Returns
131 -------
132 registry : `Registry`
133 New `Registry` instance, or `None` *only* if `share_repo_with` is
134 not `None` and this test case does not support that argument
135 (e.g. it is impossible with in-memory SQLite DBs).
136 """
137 raise NotImplementedError()
139 def loadData(self, registry: Registry, filename: str):
140 """Load registry test data from ``getDataDir/<filename>``,
141 which should be a YAML import/export file.
142 """
143 from ...transfers import YamlRepoImportBackend
145 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
146 backend = YamlRepoImportBackend(stream, registry)
147 backend.register()
148 backend.load(datastore=None)
150 def checkQueryResults(self, results, expected):
151 """Check that a query results object contains expected values.
153 Parameters
154 ----------
155 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
156 A lazy-evaluation query results object.
157 expected : `list`
158 A list of `DataCoordinate` o `DatasetRef` objects that should be
159 equal to results of the query, aside from ordering.
160 """
161 self.assertCountEqual(list(results), expected)
162 self.assertEqual(results.count(), len(expected))
163 if expected:
164 self.assertTrue(results.any())
165 else:
166 self.assertFalse(results.any())
168 def testOpaque(self):
169 """Tests for `Registry.registerOpaqueTable`,
170 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
171 `Registry.deleteOpaqueData`.
172 """
173 registry = self.makeRegistry()
174 table = "opaque_table_for_testing"
175 registry.registerOpaqueTable(
176 table,
177 spec=ddl.TableSpec(
178 fields=[
179 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
180 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
181 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
182 ],
183 ),
184 )
185 rows = [
186 {"id": 1, "name": "one", "count": None},
187 {"id": 2, "name": "two", "count": 5},
188 {"id": 3, "name": "three", "count": 6},
189 ]
190 registry.insertOpaqueData(table, *rows)
191 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
193 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
194 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
196 # Test very long IN clause which exceeds sqlite limit on number of
197 # parameters. SQLite says the limit is 32k but it looks like it is
198 # much higher.
199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
200 # Two IN clauses, each longer than 1k batch size, first with
201 # duplicates, second has matching elements in different batches (after
202 # sorting).
203 self.assertEqual(
204 rows[0:2],
205 list(
206 registry.fetchOpaqueData(
207 table,
208 id=list(range(1000)) + list(range(100, 0, -1)),
209 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
210 )
211 ),
212 )
213 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
214 registry.deleteOpaqueData(table, id=3)
215 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
216 registry.deleteOpaqueData(table)
217 self.assertEqual([], list(registry.fetchOpaqueData(table)))
219 def testDatasetType(self):
220 """Tests for `Registry.registerDatasetType` and
221 `Registry.getDatasetType`.
222 """
223 registry = self.makeRegistry()
224 # Check valid insert
225 datasetTypeName = "test"
226 storageClass = StorageClass("testDatasetType")
227 registry.storageClasses.registerStorageClass(storageClass)
228 dimensions = registry.dimensions.extract(("instrument", "visit"))
229 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
230 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
231 # Inserting for the first time should return True
232 self.assertTrue(registry.registerDatasetType(inDatasetType))
233 outDatasetType1 = registry.getDatasetType(datasetTypeName)
234 self.assertEqual(outDatasetType1, inDatasetType)
236 # Re-inserting should work
237 self.assertFalse(registry.registerDatasetType(inDatasetType))
238 # Except when they are not identical
239 with self.assertRaises(ConflictingDefinitionError):
240 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
241 registry.registerDatasetType(nonIdenticalDatasetType)
243 # Template can be None
244 datasetTypeName = "testNoneTemplate"
245 storageClass = StorageClass("testDatasetType2")
246 registry.storageClasses.registerStorageClass(storageClass)
247 dimensions = registry.dimensions.extract(("instrument", "visit"))
248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
249 registry.registerDatasetType(inDatasetType)
250 outDatasetType2 = registry.getDatasetType(datasetTypeName)
251 self.assertEqual(outDatasetType2, inDatasetType)
253 allTypes = set(registry.queryDatasetTypes())
254 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
256 def testDimensions(self):
257 """Tests for `Registry.insertDimensionData`,
258 `Registry.syncDimensionData`, and `Registry.expandDataId`.
259 """
260 registry = self.makeRegistry()
261 dimensionName = "instrument"
262 dimension = registry.dimensions[dimensionName]
263 dimensionValue = {
264 "name": "DummyCam",
265 "visit_max": 10,
266 "visit_system": 0,
267 "exposure_max": 10,
268 "detector_max": 2,
269 "class_name": "lsst.pipe.base.Instrument",
270 }
271 registry.insertDimensionData(dimensionName, dimensionValue)
272 # Inserting the same value twice should fail
273 with self.assertRaises(sqlalchemy.exc.IntegrityError):
274 registry.insertDimensionData(dimensionName, dimensionValue)
275 # expandDataId should retrieve the record we just inserted
276 self.assertEqual(
277 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
278 .records[dimensionName]
279 .toDict(),
280 dimensionValue,
281 )
282 # expandDataId should raise if there is no record with the given ID.
283 with self.assertRaises(DataIdValueError):
284 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
285 # band doesn't have a table; insert should fail.
286 with self.assertRaises(TypeError):
287 registry.insertDimensionData("band", {"band": "i"})
288 dimensionName2 = "physical_filter"
289 dimension2 = registry.dimensions[dimensionName2]
290 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
291 # Missing required dependency ("instrument") should fail
292 with self.assertRaises(KeyError):
293 registry.insertDimensionData(dimensionName2, dimensionValue2)
294 # Adding required dependency should fix the failure
295 dimensionValue2["instrument"] = "DummyCam"
296 registry.insertDimensionData(dimensionName2, dimensionValue2)
297 # expandDataId should retrieve the record we just inserted.
298 self.assertEqual(
299 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
300 .records[dimensionName2]
301 .toDict(),
302 dimensionValue2,
303 )
304 # Use syncDimensionData to insert a new record successfully.
305 dimensionName3 = "detector"
306 dimensionValue3 = {
307 "instrument": "DummyCam",
308 "id": 1,
309 "full_name": "one",
310 "name_in_raft": "zero",
311 "purpose": "SCIENCE",
312 }
313 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
314 # Sync that again. Note that one field ("raft") is NULL, and that
315 # should be okay.
316 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
317 # Now try that sync with the same primary key but a different value.
318 # This should fail.
319 with self.assertRaises(ConflictingDefinitionError):
320 registry.syncDimensionData(
321 dimensionName3,
322 {
323 "instrument": "DummyCam",
324 "id": 1,
325 "full_name": "one",
326 "name_in_raft": "four",
327 "purpose": "SCIENCE",
328 },
329 )
331 @unittest.skipIf(np is None, "numpy not available.")
332 def testNumpyDataId(self):
333 """Test that we can use a numpy int in a dataId."""
334 registry = self.makeRegistry()
335 dimensionEntries = [
336 ("instrument", {"instrument": "DummyCam"}),
337 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
338 # Using an np.int64 here fails unless Records.fromDict is also
339 # patched to look for numbers.Integral
340 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
341 ]
342 for args in dimensionEntries:
343 registry.insertDimensionData(*args)
345 # Try a normal integer and something that looks like an int but
346 # is not.
347 for visit_id in (42, np.int64(42)):
348 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
349 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
350 self.assertEqual(expanded["visit"], int(visit_id))
351 self.assertIsInstance(expanded["visit"], int)
353 def testDataIdRelationships(self):
354 """Test that `Registry.expandDataId` raises an exception when the given
355 keys are inconsistent.
356 """
357 registry = self.makeRegistry()
358 self.loadData(registry, "base.yaml")
359 # Insert a few more dimension records for the next test.
360 registry.insertDimensionData(
361 "exposure",
362 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
363 )
364 registry.insertDimensionData(
365 "exposure",
366 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
367 )
368 registry.insertDimensionData(
369 "visit_system",
370 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
371 )
372 registry.insertDimensionData(
373 "visit",
374 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
375 )
376 registry.insertDimensionData(
377 "visit_definition",
378 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
379 )
380 with self.assertRaises(InconsistentDataIdError):
381 registry.expandDataId(
382 {"instrument": "Cam1", "visit": 1, "exposure": 2},
383 )
385 def testDataset(self):
386 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
387 and `Registry.removeDatasets`.
388 """
389 registry = self.makeRegistry()
390 self.loadData(registry, "base.yaml")
391 run = "tésτ"
392 registry.registerRun(run)
393 datasetType = registry.getDatasetType("bias")
394 dataId = {"instrument": "Cam1", "detector": 2}
395 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
396 outRef = registry.getDataset(ref.id)
397 self.assertIsNotNone(ref.id)
398 self.assertEqual(ref, outRef)
399 with self.assertRaises(ConflictingDefinitionError):
400 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
401 registry.removeDatasets([ref])
402 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
404 def testFindDataset(self):
405 """Tests for `Registry.findDataset`."""
406 registry = self.makeRegistry()
407 self.loadData(registry, "base.yaml")
408 run = "tésτ"
409 datasetType = registry.getDatasetType("bias")
410 dataId = {"instrument": "Cam1", "detector": 4}
411 registry.registerRun(run)
412 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
413 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
414 self.assertEqual(outputRef, inputRef)
415 # Check that retrieval with invalid dataId raises
416 with self.assertRaises(LookupError):
417 dataId = {"instrument": "Cam1"} # no detector
418 registry.findDataset(datasetType, dataId, collections=run)
419 # Check that different dataIds match to different datasets
420 dataId1 = {"instrument": "Cam1", "detector": 1}
421 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
422 dataId2 = {"instrument": "Cam1", "detector": 2}
423 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
424 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
425 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
426 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
427 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
428 # Check that requesting a non-existing dataId returns None
429 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
430 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
431 # Search more than one collection, in which two have the right
432 # dataset type and another does not.
433 registry.registerRun("empty")
434 self.loadData(registry, "datasets-uuid.yaml")
435 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
436 self.assertIsNotNone(bias1)
437 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
438 self.assertIsNotNone(bias2)
439 self.assertEqual(
440 bias1,
441 registry.findDataset(
442 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
443 ),
444 )
445 self.assertEqual(
446 bias2,
447 registry.findDataset(
448 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
449 ),
450 )
451 # Search more than one collection, with one of them a CALIBRATION
452 # collection.
453 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
454 timespan = Timespan(
455 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
456 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
457 )
458 registry.certify("Cam1/calib", [bias2], timespan=timespan)
459 self.assertEqual(
460 bias1,
461 registry.findDataset(
462 "bias",
463 instrument="Cam1",
464 detector=2,
465 collections=["empty", "imported_g", "Cam1/calib"],
466 timespan=timespan,
467 ),
468 )
469 self.assertEqual(
470 bias2,
471 registry.findDataset(
472 "bias",
473 instrument="Cam1",
474 detector=2,
475 collections=["empty", "Cam1/calib", "imported_g"],
476 timespan=timespan,
477 ),
478 )
479 # If we try to search those same collections without a timespan, it
480 # should still work, since the CALIBRATION collection is ignored.
481 self.assertEqual(
482 bias1,
483 registry.findDataset(
484 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
485 ),
486 )
487 self.assertEqual(
488 bias1,
489 registry.findDataset(
490 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
491 ),
492 )
494 def testRemoveDatasetTypeSuccess(self):
495 """Test that Registry.removeDatasetType works when there are no
496 datasets of that type present.
497 """
498 registry = self.makeRegistry()
499 self.loadData(registry, "base.yaml")
500 registry.removeDatasetType("flat")
501 with self.assertRaises(MissingDatasetTypeError):
502 registry.getDatasetType("flat")
504 def testRemoveDatasetTypeFailure(self):
505 """Test that Registry.removeDatasetType raises when there are datasets
506 of that type present or if the dataset type is for a component.
507 """
508 registry = self.makeRegistry()
509 self.loadData(registry, "base.yaml")
510 self.loadData(registry, "datasets.yaml")
511 with self.assertRaises(OrphanedRecordError):
512 registry.removeDatasetType("flat")
513 with self.assertRaises(ValueError):
514 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
516 def testImportDatasetsUUID(self):
517 """Test for `Registry._importDatasets` with UUID dataset ID."""
518 if isinstance(self.datasetsManager, str):
519 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
520 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
521 elif isinstance(self.datasetsManager, dict):
522 if not self.datasetsManager["cls"].endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
523 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
525 registry = self.makeRegistry()
526 self.loadData(registry, "base.yaml")
527 for run in range(6):
528 registry.registerRun(f"run{run}")
529 datasetTypeBias = registry.getDatasetType("bias")
530 datasetTypeFlat = registry.getDatasetType("flat")
531 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
532 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
533 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
535 dataset_id = uuid.uuid4()
536 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
537 (ref1,) = registry._importDatasets([ref])
538 # UUID is used without change
539 self.assertEqual(ref.id, ref1.id)
541 # All different failure modes
542 refs = (
543 # Importing same DatasetRef with different dataset ID is an error
544 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
545 # Same DatasetId but different DataId
546 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
547 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
548 # Same DatasetRef and DatasetId but different run
549 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
550 )
551 for ref in refs:
552 with self.assertRaises(ConflictingDefinitionError):
553 registry._importDatasets([ref])
555 # Test for non-unique IDs, they can be re-imported multiple times.
556 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
557 with self.subTest(idGenMode=idGenMode):
558 # Use integer dataset ID to force UUID calculation in _import
559 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
560 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
561 self.assertIsInstance(ref1.id, uuid.UUID)
562 self.assertEqual(ref1.id.version, 5)
564 # Importing it again is OK
565 (ref2,) = registry._importDatasets([ref1])
566 self.assertEqual(ref2.id, ref1.id)
568 # Cannot import to different run with the same ID
569 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
570 with self.assertRaises(ConflictingDefinitionError):
571 registry._importDatasets([ref])
573 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
574 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
575 # Cannot import same DATAID_TYPE ref into a new run
576 with self.assertRaises(ConflictingDefinitionError):
577 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
578 else:
579 # DATAID_TYPE_RUN ref can be imported into a new run
580 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
582 def testDatasetTypeComponentQueries(self):
583 """Test component options when querying for dataset types.
585 All of the behavior here is deprecated, so many of these tests are
586 currently wrapped in a context to check that we get a warning whenever
587 a component dataset is actually returned.
588 """
589 registry = self.makeRegistry()
590 self.loadData(registry, "base.yaml")
591 self.loadData(registry, "datasets.yaml")
592 # Test querying for dataset types with different inputs.
593 # First query for all dataset types; components should only be included
594 # when components=True.
595 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
596 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
597 with self.assertWarns(FutureWarning):
598 self.assertLess(
599 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
600 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
601 )
602 # Use a pattern that can match either parent or components. Again,
603 # components are only returned if components=True.
604 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
605 self.assertEqual(
606 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
607 )
608 with self.assertWarns(FutureWarning):
609 self.assertLess(
610 {"bias", "bias.wcs"},
611 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
612 )
613 # This pattern matches only a component. In this case we also return
614 # that component dataset type if components=None.
615 with self.assertWarns(FutureWarning):
616 self.assertEqual(
617 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
618 )
619 self.assertEqual(
620 set(),
621 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
622 )
623 with self.assertWarns(FutureWarning):
624 self.assertEqual(
625 {"bias.wcs"},
626 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
627 )
628 # Add a dataset type using a StorageClass that we'll then remove; check
629 # that this does not affect our ability to query for dataset types
630 # (though it will warn).
631 tempStorageClass = StorageClass(
632 name="TempStorageClass",
633 components={
634 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
635 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
636 },
637 )
638 registry.storageClasses.registerStorageClass(tempStorageClass)
639 datasetType = DatasetType(
640 "temporary",
641 dimensions=["instrument"],
642 storageClass=tempStorageClass,
643 universe=registry.dimensions,
644 )
645 registry.registerDatasetType(datasetType)
646 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
647 datasetType._storageClass = None
648 del tempStorageClass
649 # Querying for all dataset types, including components, should include
650 # at least all non-component dataset types (and I don't want to
651 # enumerate all of the Exposure components for bias and flat here).
652 with self.assertWarns(FutureWarning):
653 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
654 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
655 self.assertIn("TempStorageClass", cm.output[0])
656 self.assertLess({"bias", "flat", "temporary"}, everything.names)
657 # It should not include "temporary.columns", because we tried to remove
658 # the storage class that would tell it about that. So if the next line
659 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
660 # this part of the test isn't doing anything, because the _unregister
661 # call about isn't simulating the real-life case we want it to
662 # simulate, in which different versions of daf_butler in entirely
663 # different Python processes interact with the same repo.
664 self.assertNotIn("temporary.data", everything.names)
665 # Query for dataset types that start with "temp". This should again
666 # not include the component, and also not fail.
667 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
668 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
669 self.assertIn("TempStorageClass", cm.output[0])
670 self.assertEqual({"temporary"}, startsWithTemp.names)
671 # Querying with no components should not warn at all.
672 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
673 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
674 # Must issue a warning of our own to be captured.
675 logging.getLogger("lsst.daf.butler.registries").warning("test message")
676 self.assertEqual(len(cm.output), 1)
677 self.assertIn("test message", cm.output[0])
679 def testComponentLookups(self):
680 """Test searching for component datasets via their parents.
682 All of the behavior here is deprecated, so many of these tests are
683 currently wrapped in a context to check that we get a warning whenever
684 a component dataset is actually returned.
685 """
686 registry = self.makeRegistry()
687 self.loadData(registry, "base.yaml")
688 self.loadData(registry, "datasets.yaml")
689 # Test getting the child dataset type (which does still exist in the
690 # Registry), and check for consistency with
691 # DatasetRef.makeComponentRef.
692 collection = "imported_g"
693 parentType = registry.getDatasetType("bias")
694 childType = registry.getDatasetType("bias.wcs")
695 parentRefResolved = registry.findDataset(
696 parentType, collections=collection, instrument="Cam1", detector=1
697 )
698 self.assertIsInstance(parentRefResolved, DatasetRef)
699 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
700 # Search for a single dataset with findDataset.
701 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
702 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
703 # Search for detector data IDs constrained by component dataset
704 # existence with queryDataIds.
705 with self.assertWarns(FutureWarning):
706 dataIds = registry.queryDataIds(
707 ["detector"],
708 datasets=["bias.wcs"],
709 collections=collection,
710 ).toSet()
711 self.assertEqual(
712 dataIds,
713 DataCoordinateSet(
714 {
715 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
716 for d in (1, 2, 3)
717 },
718 parentType.dimensions,
719 ),
720 )
721 # Search for multiple datasets of a single type with queryDatasets.
722 with self.assertWarns(FutureWarning):
723 childRefs2 = set(
724 registry.queryDatasets(
725 "bias.wcs",
726 collections=collection,
727 )
728 )
729 self.assertEqual(
730 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
731 )
733 def testCollections(self):
734 """Tests for registry methods that manage collections."""
735 registry = self.makeRegistry()
736 other_registry = self.makeRegistry(share_repo_with=registry)
737 self.loadData(registry, "base.yaml")
738 self.loadData(registry, "datasets.yaml")
739 run1 = "imported_g"
740 run2 = "imported_r"
741 # Test setting a collection docstring after it has been created.
742 registry.setCollectionDocumentation(run1, "doc for run1")
743 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
744 registry.setCollectionDocumentation(run1, None)
745 self.assertIsNone(registry.getCollectionDocumentation(run1))
746 datasetType = "bias"
747 # Find some datasets via their run's collection.
748 dataId1 = {"instrument": "Cam1", "detector": 1}
749 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
750 self.assertIsNotNone(ref1)
751 dataId2 = {"instrument": "Cam1", "detector": 2}
752 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
753 self.assertIsNotNone(ref2)
754 # Associate those into a new collection, then look for them there.
755 tag1 = "tag1"
756 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
757 # Check that we can query for old and new collections by type.
758 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
759 self.assertEqual(
760 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
761 {tag1, run1, run2},
762 )
763 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
764 registry.associate(tag1, [ref1, ref2])
765 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
766 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
767 # Disassociate one and verify that we can't it there anymore...
768 registry.disassociate(tag1, [ref1])
769 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
770 # ...but we can still find ref2 in tag1, and ref1 in the run.
771 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
772 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
773 collections = set(registry.queryCollections())
774 self.assertEqual(collections, {run1, run2, tag1})
775 # Associate both refs into tag1 again; ref2 is already there, but that
776 # should be a harmless no-op.
777 registry.associate(tag1, [ref1, ref2])
778 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
779 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
780 # Get a different dataset (from a different run) that has the same
781 # dataset type and data ID as ref2.
782 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
783 self.assertNotEqual(ref2, ref2b)
784 # Attempting to associate that into tag1 should be an error.
785 with self.assertRaises(ConflictingDefinitionError):
786 registry.associate(tag1, [ref2b])
787 # That error shouldn't have messed up what we had before.
788 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
789 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
790 # Attempt to associate the conflicting dataset again, this time with
791 # a dataset that isn't in the collection and won't cause a conflict.
792 # Should also fail without modifying anything.
793 dataId3 = {"instrument": "Cam1", "detector": 3}
794 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
795 with self.assertRaises(ConflictingDefinitionError):
796 registry.associate(tag1, [ref3, ref2b])
797 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
798 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
799 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
800 # Register a chained collection that searches [tag1, run2]
801 chain1 = "chain1"
802 registry.registerCollection(chain1, type=CollectionType.CHAINED)
803 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
804 # Chained collection exists, but has no collections in it.
805 self.assertFalse(registry.getCollectionChain(chain1))
806 # If we query for all collections, we should get the chained collection
807 # only if we don't ask to flatten it (i.e. yield only its children).
808 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
809 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
810 # Attempt to set its child collections to something circular; that
811 # should fail.
812 with self.assertRaises(ValueError):
813 registry.setCollectionChain(chain1, [tag1, chain1])
814 # Add the child collections.
815 registry.setCollectionChain(chain1, [tag1, run2])
816 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
817 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
818 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
819 # Refresh the other registry that points to the same repo, and make
820 # sure it can see the things we've done (note that this does require
821 # an explicit refresh(); that's the documented behavior, because
822 # caching is ~impossible otherwise).
823 if other_registry is not None:
824 other_registry.refresh()
825 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
826 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
827 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
828 # Searching for dataId1 or dataId2 in the chain should return ref1 and
829 # ref2, because both are in tag1.
830 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
831 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
832 # Now disassociate ref2 from tag1. The search (for bias) with
833 # dataId2 in chain1 should then:
834 # 1. not find it in tag1
835 # 2. find a different dataset in run2
836 registry.disassociate(tag1, [ref2])
837 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
838 self.assertNotEqual(ref2b, ref2)
839 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
840 # Define a new chain so we can test recursive chains.
841 chain2 = "chain2"
842 registry.registerCollection(chain2, type=CollectionType.CHAINED)
843 registry.setCollectionChain(chain2, [run2, chain1])
844 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
845 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
846 # Query for collections matching a regex.
847 self.assertCountEqual(
848 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
849 ["imported_r", "imported_g"],
850 )
851 # Query for collections matching a regex or an explicit str.
852 self.assertCountEqual(
853 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
854 ["imported_r", "imported_g", "chain1"],
855 )
856 # Search for bias with dataId1 should find it via tag1 in chain2,
857 # recursing, because is not in run1.
858 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
859 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
860 # Search for bias with dataId2 should find it in run2 (ref2b).
861 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
862 # Search for a flat that is in run2. That should not be found
863 # at the front of chain2, because of the restriction to bias
864 # on run2 there, but it should be found in at the end of chain1.
865 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
866 ref4 = registry.findDataset("flat", dataId4, collections=run2)
867 self.assertIsNotNone(ref4)
868 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
869 # Deleting a collection that's part of a CHAINED collection is not
870 # allowed, and is exception-safe.
871 with self.assertRaises(Exception):
872 registry.removeCollection(run2)
873 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
874 with self.assertRaises(Exception):
875 registry.removeCollection(chain1)
876 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
877 # Actually remove chain2, test that it's gone by asking for its type.
878 registry.removeCollection(chain2)
879 with self.assertRaises(MissingCollectionError):
880 registry.getCollectionType(chain2)
881 # Actually remove run2 and chain1, which should work now.
882 registry.removeCollection(chain1)
883 registry.removeCollection(run2)
884 with self.assertRaises(MissingCollectionError):
885 registry.getCollectionType(run2)
886 with self.assertRaises(MissingCollectionError):
887 registry.getCollectionType(chain1)
888 # Remove tag1 as well, just to test that we can remove TAGGED
889 # collections.
890 registry.removeCollection(tag1)
891 with self.assertRaises(MissingCollectionError):
892 registry.getCollectionType(tag1)
894 def testCollectionChainFlatten(self):
895 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
896 registry = self.makeRegistry()
897 registry.registerCollection("inner", CollectionType.CHAINED)
898 registry.registerCollection("innermost", CollectionType.RUN)
899 registry.setCollectionChain("inner", ["innermost"])
900 registry.registerCollection("outer", CollectionType.CHAINED)
901 registry.setCollectionChain("outer", ["inner"], flatten=False)
902 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
903 registry.setCollectionChain("outer", ["inner"], flatten=True)
904 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
906 def testBasicTransaction(self):
907 """Test that all operations within a single transaction block are
908 rolled back if an exception propagates out of the block.
909 """
910 registry = self.makeRegistry()
911 storageClass = StorageClass("testDatasetType")
912 registry.storageClasses.registerStorageClass(storageClass)
913 with registry.transaction():
914 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
915 with self.assertRaises(ValueError):
916 with registry.transaction():
917 registry.insertDimensionData("instrument", {"name": "Cam2"})
918 raise ValueError("Oops, something went wrong")
919 # Cam1 should exist
920 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
921 # But Cam2 and Cam3 should both not exist
922 with self.assertRaises(DataIdValueError):
923 registry.expandDataId(instrument="Cam2")
924 with self.assertRaises(DataIdValueError):
925 registry.expandDataId(instrument="Cam3")
927 def testNestedTransaction(self):
928 """Test that operations within a transaction block are not rolled back
929 if an exception propagates out of an inner transaction block and is
930 then caught.
931 """
932 registry = self.makeRegistry()
933 dimension = registry.dimensions["instrument"]
934 dataId1 = {"instrument": "DummyCam"}
935 dataId2 = {"instrument": "DummyCam2"}
936 checkpointReached = False
937 with registry.transaction():
938 # This should be added and (ultimately) committed.
939 registry.insertDimensionData(dimension, dataId1)
940 with self.assertRaises(sqlalchemy.exc.IntegrityError):
941 with registry.transaction(savepoint=True):
942 # This does not conflict, and should succeed (but not
943 # be committed).
944 registry.insertDimensionData(dimension, dataId2)
945 checkpointReached = True
946 # This should conflict and raise, triggerring a rollback
947 # of the previous insertion within the same transaction
948 # context, but not the original insertion in the outer
949 # block.
950 registry.insertDimensionData(dimension, dataId1)
951 self.assertTrue(checkpointReached)
952 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
953 with self.assertRaises(DataIdValueError):
954 registry.expandDataId(dataId2, graph=dimension.graph)
956 def testInstrumentDimensions(self):
957 """Test queries involving only instrument dimensions, with no joins to
958 skymap."""
959 registry = self.makeRegistry()
961 # need a bunch of dimensions and datasets for test
962 registry.insertDimensionData(
963 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
964 )
965 registry.insertDimensionData(
966 "physical_filter",
967 dict(instrument="DummyCam", name="dummy_r", band="r"),
968 dict(instrument="DummyCam", name="dummy_i", band="i"),
969 )
970 registry.insertDimensionData(
971 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
972 )
973 registry.insertDimensionData(
974 "visit_system",
975 dict(instrument="DummyCam", id=1, name="default"),
976 )
977 registry.insertDimensionData(
978 "visit",
979 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
980 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
981 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
982 )
983 for i in range(1, 6):
984 registry.insertDimensionData(
985 "visit_detector_region",
986 dict(instrument="DummyCam", visit=10, detector=i),
987 dict(instrument="DummyCam", visit=11, detector=i),
988 dict(instrument="DummyCam", visit=20, detector=i),
989 )
990 registry.insertDimensionData(
991 "exposure",
992 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
993 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
994 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
995 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
996 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
997 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
998 )
999 registry.insertDimensionData(
1000 "visit_definition",
1001 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1002 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1003 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1004 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1005 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1006 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1007 )
1008 # dataset types
1009 run1 = "test1_r"
1010 run2 = "test2_r"
1011 tagged2 = "test2_t"
1012 registry.registerRun(run1)
1013 registry.registerRun(run2)
1014 registry.registerCollection(tagged2)
1015 storageClass = StorageClass("testDataset")
1016 registry.storageClasses.registerStorageClass(storageClass)
1017 rawType = DatasetType(
1018 name="RAW",
1019 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1020 storageClass=storageClass,
1021 )
1022 registry.registerDatasetType(rawType)
1023 calexpType = DatasetType(
1024 name="CALEXP",
1025 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1026 storageClass=storageClass,
1027 )
1028 registry.registerDatasetType(calexpType)
1030 # add pre-existing datasets
1031 for exposure in (100, 101, 110, 111):
1032 for detector in (1, 2, 3):
1033 # note that only 3 of 5 detectors have datasets
1034 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1035 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1036 # exposures 100 and 101 appear in both run1 and tagged2.
1037 # 100 has different datasets in the different collections
1038 # 101 has the same dataset in both collections.
1039 if exposure == 100:
1040 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1041 if exposure in (100, 101):
1042 registry.associate(tagged2, [ref])
1043 # Add pre-existing datasets to tagged2.
1044 for exposure in (200, 201):
1045 for detector in (3, 4, 5):
1046 # note that only 3 of 5 detectors have datasets
1047 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1048 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1049 registry.associate(tagged2, [ref])
1051 dimensions = DimensionGraph(
1052 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1053 )
1054 # Test that single dim string works as well as list of str
1055 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1056 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1057 self.assertEqual(rows, rowsI)
1058 # with empty expression
1059 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1060 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1061 for dataId in rows:
1062 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1063 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
1064 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
1065 self.assertEqual(
1066 packer1.unpack(packer1.pack(dataId)),
1067 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1068 )
1069 self.assertEqual(
1070 packer2.unpack(packer2.pack(dataId)),
1071 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1072 )
1073 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1074 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1075 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1076 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1078 # second collection
1079 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1080 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1081 for dataId in rows:
1082 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1083 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1084 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1085 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1087 # with two input datasets
1088 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1089 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1090 for dataId in rows:
1091 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1092 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1093 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1094 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1096 # limit to single visit
1097 rows = registry.queryDataIds(
1098 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1099 ).toSet()
1100 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1101 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1102 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1103 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1105 # more limiting expression, using link names instead of Table.column
1106 rows = registry.queryDataIds(
1107 dimensions,
1108 datasets=rawType,
1109 collections=run1,
1110 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1111 ).toSet()
1112 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1113 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1114 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1115 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1117 # queryDataIds with only one of `datasets` and `collections` is an
1118 # error.
1119 with self.assertRaises(CollectionError):
1120 registry.queryDataIds(dimensions, datasets=rawType)
1121 with self.assertRaises(ArgumentError):
1122 registry.queryDataIds(dimensions, collections=run1)
1124 # expression excludes everything
1125 rows = registry.queryDataIds(
1126 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1127 ).toSet()
1128 self.assertEqual(len(rows), 0)
1130 # Selecting by physical_filter, this is not in the dimensions, but it
1131 # is a part of the full expression so it should work too.
1132 rows = registry.queryDataIds(
1133 dimensions,
1134 datasets=rawType,
1135 collections=run1,
1136 where="physical_filter = 'dummy_r'",
1137 instrument="DummyCam",
1138 ).toSet()
1139 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1140 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1141 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1142 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1144 def testSkyMapDimensions(self):
1145 """Tests involving only skymap dimensions, no joins to instrument."""
1146 registry = self.makeRegistry()
1148 # need a bunch of dimensions and datasets for test, we want
1149 # "band" in the test so also have to add physical_filter
1150 # dimensions
1151 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1152 registry.insertDimensionData(
1153 "physical_filter",
1154 dict(instrument="DummyCam", name="dummy_r", band="r"),
1155 dict(instrument="DummyCam", name="dummy_i", band="i"),
1156 )
1157 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1158 for tract in range(10):
1159 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1160 registry.insertDimensionData(
1161 "patch",
1162 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1163 )
1165 # dataset types
1166 run = "tésτ"
1167 registry.registerRun(run)
1168 storageClass = StorageClass("testDataset")
1169 registry.storageClasses.registerStorageClass(storageClass)
1170 calexpType = DatasetType(
1171 name="deepCoadd_calexp",
1172 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1173 storageClass=storageClass,
1174 )
1175 registry.registerDatasetType(calexpType)
1176 mergeType = DatasetType(
1177 name="deepCoadd_mergeDet",
1178 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1179 storageClass=storageClass,
1180 )
1181 registry.registerDatasetType(mergeType)
1182 measType = DatasetType(
1183 name="deepCoadd_meas",
1184 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1185 storageClass=storageClass,
1186 )
1187 registry.registerDatasetType(measType)
1189 dimensions = DimensionGraph(
1190 registry.dimensions,
1191 dimensions=(
1192 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1193 ),
1194 )
1196 # add pre-existing datasets
1197 for tract in (1, 3, 5):
1198 for patch in (2, 4, 6, 7):
1199 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1200 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1201 for aFilter in ("i", "r"):
1202 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1203 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1205 # with empty expression
1206 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1207 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1208 for dataId in rows:
1209 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1210 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1211 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1212 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1214 # limit to 2 tracts and 2 patches
1215 rows = registry.queryDataIds(
1216 dimensions,
1217 datasets=[calexpType, mergeType],
1218 collections=run,
1219 where="tract IN (1, 5) AND patch IN (2, 7)",
1220 skymap="DummyMap",
1221 ).toSet()
1222 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1223 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1224 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1225 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1227 # limit to single filter
1228 rows = registry.queryDataIds(
1229 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1230 ).toSet()
1231 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1232 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1233 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1234 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1236 # Specifying non-existing skymap is an exception
1237 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1238 rows = registry.queryDataIds(
1239 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1240 ).toSet()
1242 def testSpatialJoin(self):
1243 """Test queries that involve spatial overlap joins."""
1244 registry = self.makeRegistry()
1245 self.loadData(registry, "hsc-rc2-subset.yaml")
1247 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1248 # the TopologicalFamily they belong to. We'll relate all elements in
1249 # each family to all of the elements in each other family.
1250 families = defaultdict(set)
1251 # Dictionary of {element.name: {dataId: region}}.
1252 regions = {}
1253 for element in registry.dimensions.getDatabaseElements():
1254 if element.spatial is not None:
1255 families[element.spatial.name].add(element)
1256 regions[element.name] = {
1257 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1258 }
1260 # If this check fails, it's not necessarily a problem - it may just be
1261 # a reasonable change to the default dimension definitions - but the
1262 # test below depends on there being more than one family to do anything
1263 # useful.
1264 self.assertEqual(len(families), 2)
1266 # Overlap DatabaseDimensionElements with each other.
1267 for family1, family2 in itertools.combinations(families, 2):
1268 for element1, element2 in itertools.product(families[family1], families[family2]):
1269 graph = DimensionGraph.union(element1.graph, element2.graph)
1270 # Construct expected set of overlapping data IDs via a
1271 # brute-force comparison of the regions we've already fetched.
1272 expected = {
1273 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1274 for (dataId1, region1), (dataId2, region2) in itertools.product(
1275 regions[element1.name].items(), regions[element2.name].items()
1276 )
1277 if not region1.isDisjointFrom(region2)
1278 }
1279 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1280 queried = set(registry.queryDataIds(graph))
1281 self.assertEqual(expected, queried)
1283 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1284 commonSkyPix = registry.dimensions.commonSkyPix
1285 for elementName, regions in regions.items():
1286 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1287 expected = set()
1288 for dataId, region in regions.items():
1289 for begin, end in commonSkyPix.pixelization.envelope(region):
1290 expected.update(
1291 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1292 for index in range(begin, end)
1293 )
1294 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1295 queried = set(registry.queryDataIds(graph))
1296 self.assertEqual(expected, queried)
1298 def testAbstractQuery(self):
1299 """Test that we can run a query that just lists the known
1300 bands. This is tricky because band is
1301 backed by a query against physical_filter.
1302 """
1303 registry = self.makeRegistry()
1304 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1305 registry.insertDimensionData(
1306 "physical_filter",
1307 dict(instrument="DummyCam", name="dummy_i", band="i"),
1308 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1309 dict(instrument="DummyCam", name="dummy_r", band="r"),
1310 )
1311 rows = registry.queryDataIds(["band"]).toSet()
1312 self.assertCountEqual(
1313 rows,
1314 [
1315 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1316 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1317 ],
1318 )
1320 def testAttributeManager(self):
1321 """Test basic functionality of attribute manager."""
1322 # number of attributes with schema versions in a fresh database,
1323 # 6 managers with 2 records per manager, plus config for dimensions
1324 VERSION_COUNT = 6 * 2 + 1
1326 registry = self.makeRegistry()
1327 attributes = registry._managers.attributes
1329 # check what get() returns for non-existing key
1330 self.assertIsNone(attributes.get("attr"))
1331 self.assertEqual(attributes.get("attr", ""), "")
1332 self.assertEqual(attributes.get("attr", "Value"), "Value")
1333 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1335 # cannot store empty key or value
1336 with self.assertRaises(ValueError):
1337 attributes.set("", "value")
1338 with self.assertRaises(ValueError):
1339 attributes.set("attr", "")
1341 # set value of non-existing key
1342 attributes.set("attr", "value")
1343 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1344 self.assertEqual(attributes.get("attr"), "value")
1346 # update value of existing key
1347 with self.assertRaises(ButlerAttributeExistsError):
1348 attributes.set("attr", "value2")
1350 attributes.set("attr", "value2", force=True)
1351 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1352 self.assertEqual(attributes.get("attr"), "value2")
1354 # delete existing key
1355 self.assertTrue(attributes.delete("attr"))
1356 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1358 # delete non-existing key
1359 self.assertFalse(attributes.delete("non-attr"))
1361 # store bunch of keys and get the list back
1362 data = [
1363 ("version.core", "1.2.3"),
1364 ("version.dimensions", "3.2.1"),
1365 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1366 ]
1367 for key, value in data:
1368 attributes.set(key, value)
1369 items = dict(attributes.items())
1370 for key, value in data:
1371 self.assertEqual(items[key], value)
1373 def testQueryDatasetsDeduplication(self):
1374 """Test that the findFirst option to queryDatasets selects datasets
1375 from collections in the order given".
1376 """
1377 registry = self.makeRegistry()
1378 self.loadData(registry, "base.yaml")
1379 self.loadData(registry, "datasets.yaml")
1380 self.assertCountEqual(
1381 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1382 [
1383 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1384 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1385 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1386 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1387 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1388 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1389 ],
1390 )
1391 self.assertCountEqual(
1392 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1393 [
1394 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1395 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1396 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1397 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1398 ],
1399 )
1400 self.assertCountEqual(
1401 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1402 [
1403 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1404 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1405 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1406 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1407 ],
1408 )
1410 def testQueryResults(self):
1411 """Test querying for data IDs and then manipulating the QueryResults
1412 object returned to perform other queries.
1413 """
1414 registry = self.makeRegistry()
1415 self.loadData(registry, "base.yaml")
1416 self.loadData(registry, "datasets.yaml")
1417 bias = registry.getDatasetType("bias")
1418 flat = registry.getDatasetType("flat")
1419 # Obtain expected results from methods other than those we're testing
1420 # here. That includes:
1421 # - the dimensions of the data IDs we want to query:
1422 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1423 # - the dimensions of some other data IDs we'll extract from that:
1424 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1425 # - the data IDs we expect to obtain from the first queries:
1426 expectedDataIds = DataCoordinateSet(
1427 {
1428 DataCoordinate.standardize(
1429 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1430 )
1431 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1432 },
1433 graph=expectedGraph,
1434 hasFull=False,
1435 hasRecords=False,
1436 )
1437 # - the flat datasets we expect to find from those data IDs, in just
1438 # one collection (so deduplication is irrelevant):
1439 expectedFlats = [
1440 registry.findDataset(
1441 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1442 ),
1443 registry.findDataset(
1444 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1445 ),
1446 registry.findDataset(
1447 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1448 ),
1449 ]
1450 # - the data IDs we expect to extract from that:
1451 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1452 # - the bias datasets we expect to find from those data IDs, after we
1453 # subset-out the physical_filter dimension, both with duplicates:
1454 expectedAllBiases = [
1455 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1456 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1457 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1458 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1459 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1460 ]
1461 # - ...and without duplicates:
1462 expectedDeduplicatedBiases = [
1463 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1464 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1465 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1466 ]
1467 # Test against those expected results, using a "lazy" query for the
1468 # data IDs (which re-executes that query each time we use it to do
1469 # something new).
1470 dataIds = registry.queryDataIds(
1471 ["detector", "physical_filter"],
1472 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1473 instrument="Cam1",
1474 )
1475 self.assertEqual(dataIds.graph, expectedGraph)
1476 self.assertEqual(dataIds.toSet(), expectedDataIds)
1477 self.assertCountEqual(
1478 list(
1479 dataIds.findDatasets(
1480 flat,
1481 collections=["imported_r"],
1482 )
1483 ),
1484 expectedFlats,
1485 )
1486 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1487 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1488 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1489 self.assertCountEqual(
1490 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1491 expectedAllBiases,
1492 )
1493 self.assertCountEqual(
1494 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1495 expectedDeduplicatedBiases,
1496 )
1498 # Check dimensions match.
1499 with self.assertRaises(ValueError):
1500 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1502 # Use a component dataset type.
1503 self.assertCountEqual(
1504 [
1505 ref.makeComponentRef("image")
1506 for ref in subsetDataIds.findDatasets(
1507 bias,
1508 collections=["imported_r", "imported_g"],
1509 findFirst=False,
1510 )
1511 ],
1512 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1513 )
1515 # Use a named dataset type that does not exist and a dataset type
1516 # object that does not exist.
1517 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1519 # Test both string name and dataset type object.
1520 test_type: Union[str, DatasetType]
1521 for test_type, test_type_name in (
1522 (unknown_type, unknown_type.name),
1523 (unknown_type.name, unknown_type.name),
1524 ):
1525 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1526 list(
1527 subsetDataIds.findDatasets(
1528 test_type, collections=["imported_r", "imported_g"], findFirst=True
1529 )
1530 )
1532 # Materialize the bias dataset queries (only) by putting the results
1533 # into temporary tables, then repeat those tests.
1534 with subsetDataIds.findDatasets(
1535 bias, collections=["imported_r", "imported_g"], findFirst=False
1536 ).materialize() as biases:
1537 self.assertCountEqual(list(biases), expectedAllBiases)
1538 with subsetDataIds.findDatasets(
1539 bias, collections=["imported_r", "imported_g"], findFirst=True
1540 ).materialize() as biases:
1541 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1542 # Materialize the data ID subset query, but not the dataset queries.
1543 with subsetDataIds.materialize() as subsetDataIds:
1544 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1545 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1546 self.assertCountEqual(
1547 list(
1548 subsetDataIds.findDatasets(
1549 bias, collections=["imported_r", "imported_g"], findFirst=False
1550 )
1551 ),
1552 expectedAllBiases,
1553 )
1554 self.assertCountEqual(
1555 list(
1556 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1557 ),
1558 expectedDeduplicatedBiases,
1559 )
1560 # Materialize the dataset queries, too.
1561 with subsetDataIds.findDatasets(
1562 bias, collections=["imported_r", "imported_g"], findFirst=False
1563 ).materialize() as biases:
1564 self.assertCountEqual(list(biases), expectedAllBiases)
1565 with subsetDataIds.findDatasets(
1566 bias, collections=["imported_r", "imported_g"], findFirst=True
1567 ).materialize() as biases:
1568 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1569 # Materialize the original query, but none of the follow-up queries.
1570 with dataIds.materialize() as dataIds:
1571 self.assertEqual(dataIds.graph, expectedGraph)
1572 self.assertEqual(dataIds.toSet(), expectedDataIds)
1573 self.assertCountEqual(
1574 list(
1575 dataIds.findDatasets(
1576 flat,
1577 collections=["imported_r"],
1578 )
1579 ),
1580 expectedFlats,
1581 )
1582 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1583 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1584 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1585 self.assertCountEqual(
1586 list(
1587 subsetDataIds.findDatasets(
1588 bias, collections=["imported_r", "imported_g"], findFirst=False
1589 )
1590 ),
1591 expectedAllBiases,
1592 )
1593 self.assertCountEqual(
1594 list(
1595 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1596 ),
1597 expectedDeduplicatedBiases,
1598 )
1599 # Materialize just the bias dataset queries.
1600 with subsetDataIds.findDatasets(
1601 bias, collections=["imported_r", "imported_g"], findFirst=False
1602 ).materialize() as biases:
1603 self.assertCountEqual(list(biases), expectedAllBiases)
1604 with subsetDataIds.findDatasets(
1605 bias, collections=["imported_r", "imported_g"], findFirst=True
1606 ).materialize() as biases:
1607 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1608 # Materialize the subset data ID query, but not the dataset
1609 # queries.
1610 with subsetDataIds.materialize() as subsetDataIds:
1611 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1612 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1613 self.assertCountEqual(
1614 list(
1615 subsetDataIds.findDatasets(
1616 bias, collections=["imported_r", "imported_g"], findFirst=False
1617 )
1618 ),
1619 expectedAllBiases,
1620 )
1621 self.assertCountEqual(
1622 list(
1623 subsetDataIds.findDatasets(
1624 bias, collections=["imported_r", "imported_g"], findFirst=True
1625 )
1626 ),
1627 expectedDeduplicatedBiases,
1628 )
1629 # Materialize the bias dataset queries, too, so now we're
1630 # materializing every single step.
1631 with subsetDataIds.findDatasets(
1632 bias, collections=["imported_r", "imported_g"], findFirst=False
1633 ).materialize() as biases:
1634 self.assertCountEqual(list(biases), expectedAllBiases)
1635 with subsetDataIds.findDatasets(
1636 bias, collections=["imported_r", "imported_g"], findFirst=True
1637 ).materialize() as biases:
1638 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1640 def testStorageClassPropagation(self):
1641 """Test that queries for datasets respect the storage class passed in
1642 as part of a full dataset type.
1643 """
1644 registry = self.makeRegistry()
1645 self.loadData(registry, "base.yaml")
1646 dataset_type_in_registry = DatasetType(
1647 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions
1648 )
1649 registry.registerDatasetType(dataset_type_in_registry)
1650 run = "run1"
1651 registry.registerRun(run)
1652 (inserted_ref,) = registry.insertDatasets(
1653 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1654 )
1655 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1656 query_dataset_type = DatasetType(
1657 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions
1658 )
1659 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1660 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1661 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1662 (query_datasets_ref,) = query_datasets_result
1663 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1664 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1665 query_dataset_type, collections=[run]
1666 )
1667 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1668 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1669 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1670 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1671 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1672 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1673 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1675 def testEmptyDimensionsQueries(self):
1676 """Test Query and QueryResults objects in the case where there are no
1677 dimensions.
1678 """
1679 # Set up test data: one dataset type, two runs, one dataset in each.
1680 registry = self.makeRegistry()
1681 self.loadData(registry, "base.yaml")
1682 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1683 registry.registerDatasetType(schema)
1684 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1685 run1 = "run1"
1686 run2 = "run2"
1687 registry.registerRun(run1)
1688 registry.registerRun(run2)
1689 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1690 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1691 # Query directly for both of the datasets, and each one, one at a time.
1692 self.checkQueryResults(
1693 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1694 )
1695 self.checkQueryResults(
1696 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1697 [dataset1],
1698 )
1699 self.checkQueryResults(
1700 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1701 [dataset2],
1702 )
1703 # Query for data IDs with no dimensions.
1704 dataIds = registry.queryDataIds([])
1705 self.checkQueryResults(dataIds, [dataId])
1706 # Use queried data IDs to find the datasets.
1707 self.checkQueryResults(
1708 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1709 [dataset1, dataset2],
1710 )
1711 self.checkQueryResults(
1712 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1713 [dataset1],
1714 )
1715 self.checkQueryResults(
1716 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1717 [dataset2],
1718 )
1719 # Now materialize the data ID query results and repeat those tests.
1720 with dataIds.materialize() as dataIds:
1721 self.checkQueryResults(dataIds, [dataId])
1722 self.checkQueryResults(
1723 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1724 [dataset1],
1725 )
1726 self.checkQueryResults(
1727 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1728 [dataset2],
1729 )
1730 # Query for non-empty data IDs, then subset that to get the empty one.
1731 # Repeat the above tests starting from that.
1732 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1733 self.checkQueryResults(dataIds, [dataId])
1734 self.checkQueryResults(
1735 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1736 [dataset1, dataset2],
1737 )
1738 self.checkQueryResults(
1739 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1740 [dataset1],
1741 )
1742 self.checkQueryResults(
1743 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1744 [dataset2],
1745 )
1746 with dataIds.materialize() as dataIds:
1747 self.checkQueryResults(dataIds, [dataId])
1748 self.checkQueryResults(
1749 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1750 [dataset1, dataset2],
1751 )
1752 self.checkQueryResults(
1753 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1754 [dataset1],
1755 )
1756 self.checkQueryResults(
1757 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1758 [dataset2],
1759 )
1760 # Query for non-empty data IDs, then materialize, then subset to get
1761 # the empty one. Repeat again.
1762 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1763 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1764 self.checkQueryResults(dataIds, [dataId])
1765 self.checkQueryResults(
1766 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1767 [dataset1, dataset2],
1768 )
1769 self.checkQueryResults(
1770 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1771 [dataset1],
1772 )
1773 self.checkQueryResults(
1774 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1775 [dataset2],
1776 )
1777 with dataIds.materialize() as dataIds:
1778 self.checkQueryResults(dataIds, [dataId])
1779 self.checkQueryResults(
1780 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1781 [dataset1, dataset2],
1782 )
1783 self.checkQueryResults(
1784 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1785 [dataset1],
1786 )
1787 self.checkQueryResults(
1788 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1789 [dataset2],
1790 )
1791 # Query for non-empty data IDs with a constraint on an empty-data-ID
1792 # dataset that exists.
1793 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1794 self.checkQueryResults(
1795 dataIds.subset(unique=True),
1796 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1797 )
1798 # Again query for non-empty data IDs with a constraint on empty-data-ID
1799 # datasets, but when the datasets don't exist. We delete the existing
1800 # dataset and query just that collection rather than creating a new
1801 # empty collection because this is a bit less likely for our build-time
1802 # logic to shortcut-out (via the collection summaries), and such a
1803 # shortcut would make this test a bit more trivial than we'd like.
1804 registry.removeDatasets([dataset2])
1805 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1806 self.checkQueryResults(dataIds, [])
1808 def testDimensionDataModifications(self):
1809 """Test that modifying dimension records via:
1810 syncDimensionData(..., update=True) and
1811 insertDimensionData(..., replace=True) works as expected, even in the
1812 presence of datasets using those dimensions and spatial overlap
1813 relationships.
1814 """
1816 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1817 """Unpack a sphgeom.RangeSet into the integers it contains."""
1818 for begin, end in ranges:
1819 yield from range(begin, end)
1821 def range_set_hull(
1822 ranges: lsst.sphgeom.RangeSet,
1823 pixelization: lsst.sphgeom.HtmPixelization,
1824 ) -> lsst.sphgeom.ConvexPolygon:
1825 """Create a ConvexPolygon hull of the region defined by a set of
1826 HTM pixelization index ranges.
1827 """
1828 points = []
1829 for index in unpack_range_set(ranges):
1830 points.extend(pixelization.triangle(index).getVertices())
1831 return lsst.sphgeom.ConvexPolygon(points)
1833 # Use HTM to set up an initial parent region (one arbitrary trixel)
1834 # and four child regions (the trixels within the parent at the next
1835 # level. We'll use the parent as a tract/visit region and the children
1836 # as its patch/visit_detector regions.
1837 registry = self.makeRegistry()
1838 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1839 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1840 index = 12288
1841 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1842 assert htm6.universe().contains(child_ranges_small)
1843 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1844 parent_region_small = lsst.sphgeom.ConvexPolygon(
1845 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1846 )
1847 assert all(parent_region_small.contains(c) for c in child_regions_small)
1848 # Make a larger version of each child region, defined to be the set of
1849 # htm6 trixels that overlap the original's bounding circle. Make a new
1850 # parent that's the convex hull of the new children.
1851 child_regions_large = [
1852 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1853 ]
1854 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1855 parent_region_large = lsst.sphgeom.ConvexPolygon(
1856 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1857 )
1858 assert all(parent_region_large.contains(c) for c in child_regions_large)
1859 assert parent_region_large.contains(parent_region_small)
1860 assert not parent_region_small.contains(parent_region_large)
1861 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1862 # Find some commonSkyPix indices that overlap the large regions but not
1863 # overlap the small regions. We use commonSkyPix here to make sure the
1864 # real tests later involve what's in the database, not just post-query
1865 # filtering of regions.
1866 child_difference_indices = []
1867 for large, small in zip(child_regions_large, child_regions_small):
1868 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1869 assert difference, "if this is empty, we can't test anything useful with these regions"
1870 assert all(
1871 not commonSkyPix.triangle(d).isDisjointFrom(large)
1872 and commonSkyPix.triangle(d).isDisjointFrom(small)
1873 for d in difference
1874 )
1875 child_difference_indices.append(difference)
1876 parent_difference_indices = list(
1877 unpack_range_set(
1878 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1879 )
1880 )
1881 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1882 assert all(
1883 (
1884 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1885 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1886 )
1887 for d in parent_difference_indices
1888 )
1889 # Now that we've finally got those regions, we'll insert the large ones
1890 # as tract/patch dimension records.
1891 skymap_name = "testing_v1"
1892 registry.insertDimensionData(
1893 "skymap",
1894 {
1895 "name": skymap_name,
1896 "hash": bytes([42]),
1897 "tract_max": 1,
1898 "patch_nx_max": 2,
1899 "patch_ny_max": 2,
1900 },
1901 )
1902 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1903 registry.insertDimensionData(
1904 "patch",
1905 *[
1906 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1907 for n, c in enumerate(child_regions_large)
1908 ],
1909 )
1910 # Add at dataset that uses these dimensions to make sure that modifying
1911 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1912 # implement insert with replace=True as delete-then-insert).
1913 dataset_type = DatasetType(
1914 "coadd",
1915 dimensions=["tract", "patch"],
1916 universe=registry.dimensions,
1917 storageClass="Exposure",
1918 )
1919 registry.registerDatasetType(dataset_type)
1920 registry.registerCollection("the_run", CollectionType.RUN)
1921 registry.insertDatasets(
1922 dataset_type,
1923 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1924 run="the_run",
1925 )
1926 # Query for tracts and patches that overlap some "difference" htm9
1927 # pixels; there should be overlaps, because the database has
1928 # the "large" suite of regions.
1929 self.assertEqual(
1930 {0},
1931 {
1932 data_id["tract"]
1933 for data_id in registry.queryDataIds(
1934 ["tract"],
1935 skymap=skymap_name,
1936 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1937 )
1938 },
1939 )
1940 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1941 self.assertIn(
1942 patch_id,
1943 {
1944 data_id["patch"]
1945 for data_id in registry.queryDataIds(
1946 ["patch"],
1947 skymap=skymap_name,
1948 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1949 )
1950 },
1951 )
1952 # Use sync to update the tract region and insert to update the regions
1953 # of the patches, to the "small" suite.
1954 updated = registry.syncDimensionData(
1955 "tract",
1956 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1957 update=True,
1958 )
1959 self.assertEqual(updated, {"region": parent_region_large})
1960 registry.insertDimensionData(
1961 "patch",
1962 *[
1963 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1964 for n, c in enumerate(child_regions_small)
1965 ],
1966 replace=True,
1967 )
1968 # Query again; there now should be no such overlaps, because the
1969 # database has the "small" suite of regions.
1970 self.assertFalse(
1971 set(
1972 registry.queryDataIds(
1973 ["tract"],
1974 skymap=skymap_name,
1975 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1976 )
1977 )
1978 )
1979 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1980 self.assertNotIn(
1981 patch_id,
1982 {
1983 data_id["patch"]
1984 for data_id in registry.queryDataIds(
1985 ["patch"],
1986 skymap=skymap_name,
1987 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1988 )
1989 },
1990 )
1991 # Update back to the large regions and query one more time.
1992 updated = registry.syncDimensionData(
1993 "tract",
1994 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1995 update=True,
1996 )
1997 self.assertEqual(updated, {"region": parent_region_small})
1998 registry.insertDimensionData(
1999 "patch",
2000 *[
2001 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2002 for n, c in enumerate(child_regions_large)
2003 ],
2004 replace=True,
2005 )
2006 self.assertEqual(
2007 {0},
2008 {
2009 data_id["tract"]
2010 for data_id in registry.queryDataIds(
2011 ["tract"],
2012 skymap=skymap_name,
2013 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2014 )
2015 },
2016 )
2017 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2018 self.assertIn(
2019 patch_id,
2020 {
2021 data_id["patch"]
2022 for data_id in registry.queryDataIds(
2023 ["patch"],
2024 skymap=skymap_name,
2025 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2026 )
2027 },
2028 )
2030 def testCalibrationCollections(self):
2031 """Test operations on `~CollectionType.CALIBRATION` collections,
2032 including `Registry.certify`, `Registry.decertify`, and
2033 `Registry.findDataset`.
2034 """
2035 # Setup - make a Registry, fill it with some datasets in
2036 # non-calibration collections.
2037 registry = self.makeRegistry()
2038 self.loadData(registry, "base.yaml")
2039 self.loadData(registry, "datasets.yaml")
2040 # Set up some timestamps.
2041 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2042 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2043 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2044 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2045 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2046 allTimespans = [
2047 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2048 ]
2049 # Get references to some datasets.
2050 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2051 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2052 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2053 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2054 # Register the main calibration collection we'll be working with.
2055 collection = "Cam1/calibs/default"
2056 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2057 # Cannot associate into a calibration collection (no timespan).
2058 with self.assertRaises(CollectionTypeError):
2059 registry.associate(collection, [bias2a])
2060 # Certify 2a dataset with [t2, t4) validity.
2061 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2062 # Test that we can query for this dataset via the new collection, both
2063 # on its own and with a RUN collection, as long as we don't try to join
2064 # in temporal dimensions or use findFirst=True.
2065 self.assertEqual(
2066 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2067 {bias2a},
2068 )
2069 self.assertEqual(
2070 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2071 {
2072 bias2a,
2073 bias2b,
2074 bias3b,
2075 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2076 },
2077 )
2078 self.assertEqual(
2079 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2080 {registry.expandDataId(instrument="Cam1", detector=2)},
2081 )
2082 self.assertEqual(
2083 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2084 {
2085 registry.expandDataId(instrument="Cam1", detector=2),
2086 registry.expandDataId(instrument="Cam1", detector=3),
2087 registry.expandDataId(instrument="Cam1", detector=4),
2088 },
2089 )
2091 # We should not be able to certify 2b with anything overlapping that
2092 # window.
2093 with self.assertRaises(ConflictingDefinitionError):
2094 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2095 with self.assertRaises(ConflictingDefinitionError):
2096 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2097 with self.assertRaises(ConflictingDefinitionError):
2098 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2099 with self.assertRaises(ConflictingDefinitionError):
2100 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2101 with self.assertRaises(ConflictingDefinitionError):
2102 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2103 with self.assertRaises(ConflictingDefinitionError):
2104 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2105 with self.assertRaises(ConflictingDefinitionError):
2106 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2107 with self.assertRaises(ConflictingDefinitionError):
2108 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2109 # We should be able to certify 3a with a range overlapping that window,
2110 # because it's for a different detector.
2111 # We'll certify 3a over [t1, t3).
2112 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2113 # Now we'll certify 2b and 3b together over [t4, ∞).
2114 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2116 # Fetch all associations and check that they are what we expect.
2117 self.assertCountEqual(
2118 list(
2119 registry.queryDatasetAssociations(
2120 "bias",
2121 collections=[collection, "imported_g", "imported_r"],
2122 )
2123 ),
2124 [
2125 DatasetAssociation(
2126 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2127 collection="imported_g",
2128 timespan=None,
2129 ),
2130 DatasetAssociation(
2131 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2132 collection="imported_r",
2133 timespan=None,
2134 ),
2135 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2136 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2137 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2138 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2139 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2140 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2141 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2142 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2143 ],
2144 )
2146 class Ambiguous:
2147 """Tag class to denote lookups that should be ambiguous."""
2149 pass
2151 def assertLookup(
2152 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2153 ) -> None:
2154 """Local function that asserts that a bias lookup returns the given
2155 expected result.
2156 """
2157 if expected is Ambiguous:
2158 with self.assertRaises((DatasetTypeError, LookupError)):
2159 registry.findDataset(
2160 "bias",
2161 collections=collection,
2162 instrument="Cam1",
2163 detector=detector,
2164 timespan=timespan,
2165 )
2166 else:
2167 self.assertEqual(
2168 expected,
2169 registry.findDataset(
2170 "bias",
2171 collections=collection,
2172 instrument="Cam1",
2173 detector=detector,
2174 timespan=timespan,
2175 ),
2176 )
2178 # Systematically test lookups against expected results.
2179 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2180 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2181 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2182 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2183 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2184 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2185 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2186 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2187 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2188 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2189 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2190 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2191 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2192 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2193 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2194 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2195 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2196 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2197 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2198 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2199 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2200 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2201 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2202 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2203 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2204 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2205 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2206 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2207 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2208 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2209 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2210 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2211 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2212 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2213 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2214 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2215 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2216 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2217 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2218 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2219 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2220 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2222 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2223 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2224 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2225 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2226 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2227 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2228 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2229 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2230 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2231 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2232 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2233 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2234 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2235 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2236 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2237 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2238 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2239 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2240 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2241 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2242 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2243 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2244 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2245 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2246 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2247 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2248 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2249 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2250 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2251 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2252 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2253 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2254 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2255 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2256 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2257 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2258 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2259 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2260 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2261 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2262 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2263 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2264 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2265 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2266 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2267 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2269 # Decertify everything, this time with explicit data IDs, then check
2270 # that no lookups succeed.
2271 registry.decertify(
2272 collection,
2273 "bias",
2274 Timespan(None, None),
2275 dataIds=[
2276 dict(instrument="Cam1", detector=2),
2277 dict(instrument="Cam1", detector=3),
2278 ],
2279 )
2280 for detector in (2, 3):
2281 for timespan in allTimespans:
2282 assertLookup(detector=detector, timespan=timespan, expected=None)
2283 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2284 # those.
2285 registry.certify(
2286 collection,
2287 [bias2a, bias3a],
2288 Timespan(None, None),
2289 )
2290 for timespan in allTimespans:
2291 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2292 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2293 # Decertify just bias2 over [t2, t4).
2294 # This should split a single certification row into two (and leave the
2295 # other existing row, for bias3a, alone).
2296 registry.decertify(
2297 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2298 )
2299 for timespan in allTimespans:
2300 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2301 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2302 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2303 if overlapsBefore and overlapsAfter:
2304 expected = Ambiguous
2305 elif overlapsBefore or overlapsAfter:
2306 expected = bias2a
2307 else:
2308 expected = None
2309 assertLookup(detector=2, timespan=timespan, expected=expected)
2311 def testSkipCalibs(self):
2312 """Test how queries handle skipping of calibration collections."""
2313 registry = self.makeRegistry()
2314 self.loadData(registry, "base.yaml")
2315 self.loadData(registry, "datasets.yaml")
2317 coll_calib = "Cam1/calibs/default"
2318 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2320 # Add all biases to the calibration collection.
2321 # Without this, the logic that prunes dataset subqueries based on
2322 # datasetType-collection summary information will fire before the logic
2323 # we want to test below. This is a good thing (it avoids the dreaded
2324 # NotImplementedError a bit more often) everywhere but here.
2325 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2327 coll_list = [coll_calib, "imported_g", "imported_r"]
2328 chain = "Cam1/chain"
2329 registry.registerCollection(chain, type=CollectionType.CHAINED)
2330 registry.setCollectionChain(chain, coll_list)
2332 # explicit list will raise if findFirst=True or there are temporal
2333 # dimensions
2334 with self.assertRaises(NotImplementedError):
2335 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2336 with self.assertRaises(NotImplementedError):
2337 registry.queryDataIds(
2338 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2339 ).count()
2341 # chain will skip
2342 datasets = list(registry.queryDatasets("bias", collections=chain))
2343 self.assertGreater(len(datasets), 0)
2345 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2346 self.assertGreater(len(dataIds), 0)
2348 # glob will skip too
2349 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2350 self.assertGreater(len(datasets), 0)
2352 # regular expression will skip too
2353 pattern = re.compile(".*")
2354 datasets = list(registry.queryDatasets("bias", collections=pattern))
2355 self.assertGreater(len(datasets), 0)
2357 # ellipsis should work as usual
2358 datasets = list(registry.queryDatasets("bias", collections=...))
2359 self.assertGreater(len(datasets), 0)
2361 # few tests with findFirst
2362 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2363 self.assertGreater(len(datasets), 0)
2365 def testIngestTimeQuery(self):
2366 registry = self.makeRegistry()
2367 self.loadData(registry, "base.yaml")
2368 dt0 = datetime.utcnow()
2369 self.loadData(registry, "datasets.yaml")
2370 dt1 = datetime.utcnow()
2372 datasets = list(registry.queryDatasets(..., collections=...))
2373 len0 = len(datasets)
2374 self.assertGreater(len0, 0)
2376 where = "ingest_date > T'2000-01-01'"
2377 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2378 len1 = len(datasets)
2379 self.assertEqual(len0, len1)
2381 # no one will ever use this piece of software in 30 years
2382 where = "ingest_date > T'2050-01-01'"
2383 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2384 len2 = len(datasets)
2385 self.assertEqual(len2, 0)
2387 # Check more exact timing to make sure there is no 37 seconds offset
2388 # (after fixing DM-30124). SQLite time precision is 1 second, make
2389 # sure that we don't test with higher precision.
2390 tests = [
2391 # format: (timestamp, operator, expected_len)
2392 (dt0 - timedelta(seconds=1), ">", len0),
2393 (dt0 - timedelta(seconds=1), "<", 0),
2394 (dt1 + timedelta(seconds=1), "<", len0),
2395 (dt1 + timedelta(seconds=1), ">", 0),
2396 ]
2397 for dt, op, expect_len in tests:
2398 dt_str = dt.isoformat(sep=" ")
2400 where = f"ingest_date {op} T'{dt_str}'"
2401 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2402 self.assertEqual(len(datasets), expect_len)
2404 # same with bind using datetime or astropy Time
2405 where = f"ingest_date {op} ingest_time"
2406 datasets = list(
2407 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2408 )
2409 self.assertEqual(len(datasets), expect_len)
2411 dt_astropy = astropy.time.Time(dt, format="datetime")
2412 datasets = list(
2413 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2414 )
2415 self.assertEqual(len(datasets), expect_len)
2417 def testTimespanQueries(self):
2418 """Test query expressions involving timespans."""
2419 registry = self.makeRegistry()
2420 self.loadData(registry, "hsc-rc2-subset.yaml")
2421 # All exposures in the database; mapping from ID to timespan.
2422 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2423 # Just those IDs, sorted (which is also temporal sorting, because HSC
2424 # exposure IDs are monotonically increasing).
2425 ids = sorted(visits.keys())
2426 self.assertGreater(len(ids), 20)
2427 # Pick some quasi-random indexes into `ids` to play with.
2428 i1 = int(len(ids) * 0.1)
2429 i2 = int(len(ids) * 0.3)
2430 i3 = int(len(ids) * 0.6)
2431 i4 = int(len(ids) * 0.8)
2432 # Extract some times from those: just before the beginning of i1 (which
2433 # should be after the end of the exposure before), exactly the
2434 # beginning of i2, just after the beginning of i3 (and before its end),
2435 # and the exact end of i4.
2436 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2437 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2438 t2 = visits[ids[i2]].begin
2439 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2440 self.assertLess(t3, visits[ids[i3]].end)
2441 t4 = visits[ids[i4]].end
2442 # Make sure those are actually in order.
2443 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2445 bind = {
2446 "t1": t1,
2447 "t2": t2,
2448 "t3": t3,
2449 "t4": t4,
2450 "ts23": Timespan(t2, t3),
2451 }
2453 def query(where):
2454 """Helper function that queries for visit data IDs and returns
2455 results as a sorted, deduplicated list of visit IDs.
2456 """
2457 return sorted(
2458 {
2459 dataId["visit"]
2460 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2461 }
2462 )
2464 # Try a bunch of timespan queries, mixing up the bounds themselves,
2465 # where they appear in the expression, and how we get the timespan into
2466 # the expression.
2468 # t1 is before the start of i1, so this should not include i1.
2469 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2470 # t2 is exactly at the start of i2, but ends are exclusive, so these
2471 # should not include i2.
2472 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2473 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2474 # t3 is in the middle of i3, so this should include i3.
2475 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2476 # This one should not include t3 by the same reasoning.
2477 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2478 # t4 is exactly at the end of i4, so this should include i4.
2479 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2480 # i4's upper bound of t4 is exclusive so this should not include t4.
2481 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2483 # Now some timespan vs. time scalar queries.
2484 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2485 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2486 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2487 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2488 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2489 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2491 # Empty timespans should not overlap anything.
2492 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2494 def testCollectionSummaries(self):
2495 """Test recording and retrieval of collection summaries."""
2496 self.maxDiff = None
2497 registry = self.makeRegistry()
2498 # Importing datasets from yaml should go through the code path where
2499 # we update collection summaries as we insert datasets.
2500 self.loadData(registry, "base.yaml")
2501 self.loadData(registry, "datasets.yaml")
2502 flat = registry.getDatasetType("flat")
2503 expected1 = CollectionSummary()
2504 expected1.dataset_types.add(registry.getDatasetType("bias"))
2505 expected1.add_data_ids(
2506 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2507 )
2508 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2509 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2510 # Create a chained collection with both of the imported runs; the
2511 # summary should be the same, because it's a union with itself.
2512 chain = "chain"
2513 registry.registerCollection(chain, CollectionType.CHAINED)
2514 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2515 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2516 # Associate flats only into a tagged collection and a calibration
2517 # collection to check summaries of those.
2518 tag = "tag"
2519 registry.registerCollection(tag, CollectionType.TAGGED)
2520 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2521 calibs = "calibs"
2522 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2523 registry.certify(
2524 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2525 )
2526 expected2 = expected1.copy()
2527 expected2.dataset_types.discard("bias")
2528 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2529 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2530 # Explicitly calling Registry.refresh() should load those same
2531 # summaries, via a totally different code path.
2532 registry.refresh()
2533 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2534 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2535 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2536 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2538 def testBindInQueryDatasets(self):
2539 """Test that the bind parameter is correctly forwarded in
2540 queryDatasets recursion.
2541 """
2542 registry = self.makeRegistry()
2543 # Importing datasets from yaml should go through the code path where
2544 # we update collection summaries as we insert datasets.
2545 self.loadData(registry, "base.yaml")
2546 self.loadData(registry, "datasets.yaml")
2547 self.assertEqual(
2548 set(registry.queryDatasets("flat", band="r", collections=...)),
2549 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2550 )
2552 def testQueryIntRangeExpressions(self):
2553 """Test integer range expressions in ``where`` arguments.
2555 Note that our expressions use inclusive stop values, unlike Python's.
2556 """
2557 registry = self.makeRegistry()
2558 self.loadData(registry, "base.yaml")
2559 self.assertEqual(
2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2562 )
2563 self.assertEqual(
2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2566 )
2567 self.assertEqual(
2568 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2569 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2570 )
2572 def testQueryResultSummaries(self):
2573 """Test summary methods like `count`, `any`, and `explain_no_results`
2574 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2575 """
2576 registry = self.makeRegistry()
2577 self.loadData(registry, "base.yaml")
2578 self.loadData(registry, "datasets.yaml")
2579 self.loadData(registry, "spatial.yaml")
2580 # Default test dataset has two collections, each with both flats and
2581 # biases. Add a new collection with only biases.
2582 registry.registerCollection("biases", CollectionType.TAGGED)
2583 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2584 # First query yields two results, and involves no postprocessing.
2585 query1 = registry.queryDataIds(["physical_filter"], band="r")
2586 self.assertTrue(query1.any(execute=False, exact=False))
2587 self.assertTrue(query1.any(execute=True, exact=False))
2588 self.assertTrue(query1.any(execute=True, exact=True))
2589 self.assertEqual(query1.count(exact=False), 2)
2590 self.assertEqual(query1.count(exact=True), 2)
2591 self.assertFalse(list(query1.explain_no_results()))
2592 # Second query should yield no results, which we should see when
2593 # we attempt to expand the data ID.
2594 query2 = registry.queryDataIds(["physical_filter"], band="h")
2595 # There's no execute=False, exact=Fals test here because the behavior
2596 # not something we want to guarantee in this case (and exact=False
2597 # says either answer is legal).
2598 self.assertFalse(query2.any(execute=True, exact=False))
2599 self.assertFalse(query2.any(execute=True, exact=True))
2600 self.assertEqual(query2.count(exact=False), 0)
2601 self.assertEqual(query2.count(exact=True), 0)
2602 self.assertTrue(list(query2.explain_no_results()))
2603 # These queries yield no results due to various problems that can be
2604 # spotted prior to execution, yielding helpful diagnostics.
2605 base_query = registry.queryDataIds(["detector", "physical_filter"])
2606 queries_and_snippets = [
2607 (
2608 # Dataset type name doesn't match any existing dataset types.
2609 registry.queryDatasets("nonexistent", collections=...),
2610 ["nonexistent"],
2611 ),
2612 (
2613 # Dataset type object isn't registered.
2614 registry.queryDatasets(
2615 DatasetType(
2616 "nonexistent",
2617 dimensions=["instrument"],
2618 universe=registry.dimensions,
2619 storageClass="Image",
2620 ),
2621 collections=...,
2622 ),
2623 ["nonexistent"],
2624 ),
2625 (
2626 # No datasets of this type in this collection.
2627 registry.queryDatasets("flat", collections=["biases"]),
2628 ["flat", "biases"],
2629 ),
2630 (
2631 # No datasets of this type in this collection.
2632 base_query.findDatasets("flat", collections=["biases"]),
2633 ["flat", "biases"],
2634 ),
2635 (
2636 # No collections matching at all.
2637 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2638 ["potato"],
2639 ),
2640 ]
2641 # The behavior of these additional queries is slated to change in the
2642 # future, so we also check for deprecation warnings.
2643 with self.assertWarns(FutureWarning):
2644 queries_and_snippets.append(
2645 (
2646 # Dataset type name doesn't match any existing dataset
2647 # types.
2648 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2649 ["nonexistent"],
2650 )
2651 )
2652 with self.assertWarns(FutureWarning):
2653 queries_and_snippets.append(
2654 (
2655 # Dataset type name doesn't match any existing dataset
2656 # types.
2657 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2658 ["nonexistent"],
2659 )
2660 )
2661 for query, snippets in queries_and_snippets:
2662 self.assertFalse(query.any(execute=False, exact=False))
2663 self.assertFalse(query.any(execute=True, exact=False))
2664 self.assertFalse(query.any(execute=True, exact=True))
2665 self.assertEqual(query.count(exact=False), 0)
2666 self.assertEqual(query.count(exact=True), 0)
2667 messages = list(query.explain_no_results())
2668 self.assertTrue(messages)
2669 # Want all expected snippets to appear in at least one message.
2670 self.assertTrue(
2671 any(
2672 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2673 ),
2674 messages,
2675 )
2677 # This query does yield results, but should also emit a warning because
2678 # dataset type patterns to queryDataIds is deprecated; just look for
2679 # the warning.
2680 with self.assertWarns(FutureWarning):
2681 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2683 # These queries yield no results due to problems that can be identified
2684 # by cheap follow-up queries, yielding helpful diagnostics.
2685 for query, snippets in [
2686 (
2687 # No records for one of the involved dimensions.
2688 registry.queryDataIds(["subfilter"]),
2689 ["no rows", "subfilter"],
2690 ),
2691 (
2692 # No records for one of the involved dimensions.
2693 registry.queryDimensionRecords("subfilter"),
2694 ["no rows", "subfilter"],
2695 ),
2696 ]:
2697 self.assertFalse(query.any(execute=True, exact=False))
2698 self.assertFalse(query.any(execute=True, exact=True))
2699 self.assertEqual(query.count(exact=True), 0)
2700 messages = list(query.explain_no_results())
2701 self.assertTrue(messages)
2702 # Want all expected snippets to appear in at least one message.
2703 self.assertTrue(
2704 any(
2705 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2706 ),
2707 messages,
2708 )
2710 # This query yields four overlaps in the database, but one is filtered
2711 # out in postprocessing. The count queries aren't accurate because
2712 # they don't account for duplication that happens due to an internal
2713 # join against commonSkyPix.
2714 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2715 self.assertEqual(
2716 {
2717 DataCoordinate.standardize(
2718 instrument="Cam1",
2719 skymap="SkyMap1",
2720 visit=v,
2721 tract=t,
2722 universe=registry.dimensions,
2723 )
2724 for v, t in [(1, 0), (2, 0), (2, 1)]
2725 },
2726 set(query3),
2727 )
2728 self.assertTrue(query3.any(execute=False, exact=False))
2729 self.assertTrue(query3.any(execute=True, exact=False))
2730 self.assertTrue(query3.any(execute=True, exact=True))
2731 self.assertGreaterEqual(query3.count(exact=False), 4)
2732 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2733 self.assertFalse(list(query3.explain_no_results()))
2734 # This query yields overlaps in the database, but all are filtered
2735 # out in postprocessing. The count queries again aren't very useful.
2736 # We have to use `where=` here to avoid an optimization that
2737 # (currently) skips the spatial postprocess-filtering because it
2738 # recognizes that no spatial join is necessary. That's not ideal, but
2739 # fixing it is out of scope for this ticket.
2740 query4 = registry.queryDataIds(
2741 ["visit", "tract"],
2742 instrument="Cam1",
2743 skymap="SkyMap1",
2744 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2745 )
2746 self.assertFalse(set(query4))
2747 self.assertTrue(query4.any(execute=False, exact=False))
2748 self.assertTrue(query4.any(execute=True, exact=False))
2749 self.assertFalse(query4.any(execute=True, exact=True))
2750 self.assertGreaterEqual(query4.count(exact=False), 1)
2751 self.assertEqual(query4.count(exact=True, discard=True), 0)
2752 messages = query4.explain_no_results()
2753 self.assertTrue(messages)
2754 self.assertTrue(any("overlap" in message for message in messages))
2755 # This query should yield results from one dataset type but not the
2756 # other, which is not registered.
2757 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2758 self.assertTrue(set(query5))
2759 self.assertTrue(query5.any(execute=False, exact=False))
2760 self.assertTrue(query5.any(execute=True, exact=False))
2761 self.assertTrue(query5.any(execute=True, exact=True))
2762 self.assertGreaterEqual(query5.count(exact=False), 1)
2763 self.assertGreaterEqual(query5.count(exact=True), 1)
2764 self.assertFalse(list(query5.explain_no_results()))
2765 # This query applies a selection that yields no results, fully in the
2766 # database. Explaining why it fails involves traversing the relation
2767 # tree and running a LIMIT 1 query at each level that has the potential
2768 # to remove rows.
2769 query6 = registry.queryDimensionRecords(
2770 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2771 )
2772 self.assertEqual(query6.count(exact=True), 0)
2773 messages = query6.explain_no_results()
2774 self.assertTrue(messages)
2775 self.assertTrue(any("no-purpose" in message for message in messages))
2777 def testQueryDataIdsOrderBy(self):
2778 """Test order_by and limit on result returned by queryDataIds()."""
2779 registry = self.makeRegistry()
2780 self.loadData(registry, "base.yaml")
2781 self.loadData(registry, "datasets.yaml")
2782 self.loadData(registry, "spatial.yaml")
2784 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2785 return registry.queryDataIds(
2786 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2787 )
2789 Test = namedtuple(
2790 "testQueryDataIdsOrderByTest",
2791 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2792 defaults=(None, None, None),
2793 )
2795 test_data = (
2796 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2797 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2798 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2799 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2800 Test(
2801 "tract.id,visit.id",
2802 "tract,visit",
2803 ((0, 1), (0, 1), (0, 2)),
2804 limit=(3,),
2805 ),
2806 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2807 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2808 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2809 Test(
2810 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2811 ),
2812 Test(
2813 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2814 ),
2815 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2816 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2817 Test(
2818 "tract,-timespan.begin,timespan.end",
2819 "tract,visit",
2820 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2821 ),
2822 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2823 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2824 Test(
2825 "tract,detector",
2826 "tract,detector",
2827 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2828 datasets="flat",
2829 collections="imported_r",
2830 ),
2831 Test(
2832 "tract,detector.full_name",
2833 "tract,detector",
2834 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2835 datasets="flat",
2836 collections="imported_r",
2837 ),
2838 Test(
2839 "tract,detector.raft,detector.name_in_raft",
2840 "tract,detector",
2841 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2842 datasets="flat",
2843 collections="imported_r",
2844 ),
2845 )
2847 for test in test_data:
2848 order_by = test.order_by.split(",")
2849 keys = test.keys.split(",")
2850 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2851 if test.limit is not None:
2852 query = query.limit(*test.limit)
2853 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2854 self.assertEqual(dataIds, test.result)
2856 # and materialize
2857 query = do_query(keys).order_by(*order_by)
2858 if test.limit is not None:
2859 query = query.limit(*test.limit)
2860 with self.assertRaises(RelationalAlgebraError):
2861 with query.materialize():
2862 pass
2864 # errors in a name
2865 for order_by in ("", "-"):
2866 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2867 list(do_query().order_by(order_by))
2869 for order_by in ("undimension.name", "-undimension.name"):
2870 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2871 list(do_query().order_by(order_by))
2873 for order_by in ("attract", "-attract"):
2874 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2875 list(do_query().order_by(order_by))
2877 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2878 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2880 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2881 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2883 with self.assertRaisesRegex(
2884 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2885 ):
2886 list(do_query("tract").order_by("timespan.begin"))
2888 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2889 list(do_query("tract").order_by("tract.timespan.begin"))
2891 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2892 list(do_query("tract").order_by("tract.name"))
2894 def testQueryDataIdsGovernorExceptions(self):
2895 """Test exceptions raised by queryDataIds() for incorrect governors."""
2896 registry = self.makeRegistry()
2897 self.loadData(registry, "base.yaml")
2898 self.loadData(registry, "datasets.yaml")
2899 self.loadData(registry, "spatial.yaml")
2901 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2902 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2904 Test = namedtuple(
2905 "testQueryDataIdExceptionsTest",
2906 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2907 defaults=(None, None, None, {}, None, 0),
2908 )
2910 test_data = (
2911 Test("tract,visit", count=6),
2912 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2913 Test(
2914 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2915 ),
2916 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2917 Test(
2918 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2919 ),
2920 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2921 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2922 Test(
2923 "tract,visit",
2924 where="instrument=cam AND skymap=map",
2925 bind={"cam": "Cam1", "map": "SkyMap1"},
2926 count=6,
2927 ),
2928 Test(
2929 "tract,visit",
2930 where="instrument=cam AND skymap=map",
2931 bind={"cam": "Cam", "map": "SkyMap"},
2932 exception=DataIdValueError,
2933 ),
2934 )
2936 for test in test_data:
2937 dimensions = test.dimensions.split(",")
2938 if test.exception:
2939 with self.assertRaises(test.exception):
2940 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2941 else:
2942 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2943 self.assertEqual(query.count(discard=True), test.count)
2945 # and materialize
2946 if test.exception:
2947 with self.assertRaises(test.exception):
2948 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2949 with query.materialize() as materialized:
2950 materialized.count(discard=True)
2951 else:
2952 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2953 with query.materialize() as materialized:
2954 self.assertEqual(materialized.count(discard=True), test.count)
2956 def testQueryDimensionRecordsOrderBy(self):
2957 """Test order_by and limit on result returned by
2958 queryDimensionRecords().
2959 """
2960 registry = self.makeRegistry()
2961 self.loadData(registry, "base.yaml")
2962 self.loadData(registry, "datasets.yaml")
2963 self.loadData(registry, "spatial.yaml")
2965 def do_query(element, datasets=None, collections=None):
2966 return registry.queryDimensionRecords(
2967 element, instrument="Cam1", datasets=datasets, collections=collections
2968 )
2970 query = do_query("detector")
2971 self.assertEqual(len(list(query)), 4)
2973 Test = namedtuple(
2974 "testQueryDataIdsOrderByTest",
2975 ("element", "order_by", "result", "limit", "datasets", "collections"),
2976 defaults=(None, None, None),
2977 )
2979 test_data = (
2980 Test("detector", "detector", (1, 2, 3, 4)),
2981 Test("detector", "-detector", (4, 3, 2, 1)),
2982 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2983 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2984 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2985 Test("visit", "visit", (1, 2)),
2986 Test("visit", "-visit.id", (2, 1)),
2987 Test("visit", "zenith_angle", (1, 2)),
2988 Test("visit", "-visit.name", (2, 1)),
2989 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2990 )
2992 for test in test_data:
2993 order_by = test.order_by.split(",")
2994 query = do_query(test.element).order_by(*order_by)
2995 if test.limit is not None:
2996 query = query.limit(*test.limit)
2997 dataIds = tuple(rec.id for rec in query)
2998 self.assertEqual(dataIds, test.result)
3000 # errors in a name
3001 for order_by in ("", "-"):
3002 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3003 list(do_query("detector").order_by(order_by))
3005 for order_by in ("undimension.name", "-undimension.name"):
3006 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3007 list(do_query("detector").order_by(order_by))
3009 for order_by in ("attract", "-attract"):
3010 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3011 list(do_query("detector").order_by(order_by))
3013 def testQueryDimensionRecordsExceptions(self):
3014 """Test exceptions raised by queryDimensionRecords()."""
3015 registry = self.makeRegistry()
3016 self.loadData(registry, "base.yaml")
3017 self.loadData(registry, "datasets.yaml")
3018 self.loadData(registry, "spatial.yaml")
3020 result = registry.queryDimensionRecords("detector")
3021 self.assertEqual(result.count(), 4)
3022 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3023 self.assertEqual(result.count(), 4)
3024 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3025 self.assertEqual(result.count(), 4)
3026 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3027 self.assertEqual(result.count(), 4)
3028 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3029 self.assertEqual(result.count(), 4)
3031 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3032 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3033 result.count()
3035 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3036 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3037 result.count()
3039 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3040 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3041 result.count()
3043 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3044 result = registry.queryDimensionRecords(
3045 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3046 )
3047 result.count()
3049 def testDatasetConstrainedDimensionRecordQueries(self):
3050 """Test that queryDimensionRecords works even when given a dataset
3051 constraint whose dimensions extend beyond the requested dimension
3052 element's.
3053 """
3054 registry = self.makeRegistry()
3055 self.loadData(registry, "base.yaml")
3056 self.loadData(registry, "datasets.yaml")
3057 # Query for physical_filter dimension records, using a dataset that
3058 # has both physical_filter and dataset dimensions.
3059 records = registry.queryDimensionRecords(
3060 "physical_filter",
3061 datasets=["flat"],
3062 collections="imported_r",
3063 )
3064 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3065 # Trying to constrain by all dataset types is an error.
3066 with self.assertRaises(TypeError):
3067 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3069 def testSkyPixDatasetQueries(self):
3070 """Test that we can build queries involving skypix dimensions as long
3071 as a dataset type that uses those dimensions is included.
3072 """
3073 registry = self.makeRegistry()
3074 self.loadData(registry, "base.yaml")
3075 dataset_type = DatasetType(
3076 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3077 )
3078 registry.registerDatasetType(dataset_type)
3079 run = "r"
3080 registry.registerRun(run)
3081 # First try queries where there are no datasets; the concern is whether
3082 # we can even build and execute these queries without raising, even
3083 # when "doomed" query shortcuts are in play.
3084 self.assertFalse(
3085 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3086 )
3087 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3088 # Now add a dataset and see that we can get it back.
3089 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3090 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3091 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3092 self.assertEqual(
3093 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3094 {data_id},
3095 )
3096 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3098 def testDatasetIdFactory(self):
3099 """Simple test for DatasetIdFactory, mostly to catch potential changes
3100 in its API.
3101 """
3102 registry = self.makeRegistry()
3103 factory = registry.datasetIdFactory
3104 dataset_type = DatasetType(
3105 "datasetType",
3106 dimensions=["detector", "instrument"],
3107 universe=registry.dimensions,
3108 storageClass="int",
3109 )
3110 run = "run"
3111 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3113 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3114 self.assertIsInstance(datasetId, uuid.UUID)
3115 self.assertEqual(datasetId.version, 4)
3117 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3118 self.assertIsInstance(datasetId, uuid.UUID)
3119 self.assertEqual(datasetId.version, 5)
3121 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3122 self.assertIsInstance(datasetId, uuid.UUID)
3123 self.assertEqual(datasetId.version, 5)
3125 def testExposureQueries(self):
3126 """Test query methods using arguments sourced from the exposure log
3127 service.
3129 The most complete test dataset currently available to daf_butler tests
3130 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3131 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3132 dimension records as it was focused on providing nontrivial spatial
3133 overlaps between visit+detector and tract+patch. So in this test we
3134 need to translate queries that originally used the exposure dimension
3135 to use the (very similar) visit dimension instead.
3136 """
3137 registry = self.makeRegistry()
3138 self.loadData(registry, "hsc-rc2-subset.yaml")
3139 self.assertEqual(
3140 [
3141 record.id
3142 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3143 .order_by("id")
3144 .limit(5)
3145 ],
3146 [318, 322, 326, 330, 332],
3147 )
3148 self.assertEqual(
3149 [
3150 data_id["visit"]
3151 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3152 ],
3153 [318, 322, 326, 330, 332],
3154 )
3155 self.assertEqual(
3156 [
3157 record.id
3158 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3159 .order_by("full_name")
3160 .limit(5)
3161 ],
3162 [73, 72, 71, 70, 65],
3163 )
3164 self.assertEqual(
3165 [
3166 data_id["detector"]
3167 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3168 .order_by("full_name")
3169 .limit(5)
3170 ],
3171 [73, 72, 71, 70, 65],
3172 )
3174 def test_long_query_names(self) -> None:
3175 """Test that queries involving very long names are handled correctly.
3177 This is especially important for PostgreSQL, which truncates symbols
3178 longer than 64 chars, but it's worth testing for all DBs.
3179 """
3180 registry = self.makeRegistry()
3181 name = "abcd" * 17
3182 registry.registerDatasetType(
3183 DatasetType(
3184 name,
3185 dimensions=(),
3186 storageClass="Exposure",
3187 universe=registry.dimensions,
3188 )
3189 )
3190 # Need to search more than one collection actually containing a
3191 # matching dataset to avoid optimizations that sidestep bugs due to
3192 # truncation by making findFirst=True a no-op.
3193 run1 = "run1"
3194 registry.registerRun(run1)
3195 run2 = "run2"
3196 registry.registerRun(run2)
3197 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3198 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3199 self.assertEqual(
3200 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3201 {ref1},
3202 )
3204 def test_skypix_constraint_queries(self) -> None:
3205 """Test queries spatially constrained by a skypix data ID."""
3206 registry = self.makeRegistry()
3207 self.loadData(registry, "hsc-rc2-subset.yaml")
3208 patch_regions = {
3209 (data_id["tract"], data_id["patch"]): data_id.region
3210 for data_id in registry.queryDataIds(["patch"]).expanded()
3211 }
3212 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3213 # This check ensures the test doesn't become trivial due to a config
3214 # change; if it does, just pick a different HTML level.
3215 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3216 # Gather all skypix IDs that definitely overlap at least one of these
3217 # patches.
3218 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3219 for patch_region in patch_regions.values():
3220 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3221 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3222 # and does not overlap at least one other patch.
3223 for skypix_id in itertools.chain.from_iterable(
3224 range(begin, end) for begin, end in relevant_skypix_ids
3225 ):
3226 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3227 overlapping_patches = {
3228 patch_key
3229 for patch_key, patch_region in patch_regions.items()
3230 if not patch_region.isDisjointFrom(skypix_region)
3231 }
3232 if overlapping_patches and overlapping_patches != patch_regions.keys():
3233 break
3234 else:
3235 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3236 self.assertEqual(
3237 {
3238 (data_id["tract"], data_id["patch"])
3239 for data_id in registry.queryDataIds(
3240 ["patch"],
3241 dataId={skypix_dimension.name: skypix_id},
3242 )
3243 },
3244 overlapping_patches,
3245 )
3247 def test_spatial_constraint_queries(self) -> None:
3248 """Test queries in which one spatial dimension in the constraint (data
3249 ID or ``where`` string) constrains a different spatial dimension in the
3250 query result columns.
3251 """
3252 registry = self.makeRegistry()
3253 self.loadData(registry, "hsc-rc2-subset.yaml")
3254 patch_regions = {
3255 (data_id["tract"], data_id["patch"]): data_id.region
3256 for data_id in registry.queryDataIds(["patch"]).expanded()
3257 }
3258 observation_regions = {
3259 (data_id["visit"], data_id["detector"]): data_id.region
3260 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3261 }
3262 all_combos = {
3263 (patch_key, observation_key)
3264 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3265 }
3266 overlapping_combos = {
3267 (patch_key, observation_key)
3268 for patch_key, observation_key in all_combos
3269 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3270 }
3271 # Check a direct spatial join with no constraint first.
3272 self.assertEqual(
3273 {
3274 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3275 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3276 },
3277 overlapping_combos,
3278 )
3279 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3280 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3281 for patch_key, observation_key in overlapping_combos:
3282 overlaps_by_patch[patch_key].add(observation_key)
3283 overlaps_by_observation[observation_key].add(patch_key)
3284 # Find patches and observations that overlap at least one of the other
3285 # but not all of the other.
3286 nontrivial_patch = next(
3287 iter(
3288 patch_key
3289 for patch_key, observation_keys in overlaps_by_patch.items()
3290 if observation_keys and observation_keys != observation_regions.keys()
3291 )
3292 )
3293 nontrivial_observation = next(
3294 iter(
3295 observation_key
3296 for observation_key, patch_keys in overlaps_by_observation.items()
3297 if patch_keys and patch_keys != patch_regions.keys()
3298 )
3299 )
3300 # Use the nontrivial patches and observations as constraints on the
3301 # other dimensions in various ways, first via a 'where' expression.
3302 # It's better in general to us 'bind' instead of f-strings, but these
3303 # all integers so there are no quoting concerns.
3304 self.assertEqual(
3305 {
3306 (data_id["visit"], data_id["detector"])
3307 for data_id in registry.queryDataIds(
3308 ["visit", "detector"],
3309 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3310 skymap="hsc_rings_v1",
3311 )
3312 },
3313 overlaps_by_patch[nontrivial_patch],
3314 )
3315 self.assertEqual(
3316 {
3317 (data_id["tract"], data_id["patch"])
3318 for data_id in registry.queryDataIds(
3319 ["patch"],
3320 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3321 instrument="HSC",
3322 )
3323 },
3324 overlaps_by_observation[nontrivial_observation],
3325 )
3326 # and then via the dataId argument.
3327 self.assertEqual(
3328 {
3329 (data_id["visit"], data_id["detector"])
3330 for data_id in registry.queryDataIds(
3331 ["visit", "detector"],
3332 dataId={
3333 "tract": nontrivial_patch[0],
3334 "patch": nontrivial_patch[1],
3335 },
3336 skymap="hsc_rings_v1",
3337 )
3338 },
3339 overlaps_by_patch[nontrivial_patch],
3340 )
3341 self.assertEqual(
3342 {
3343 (data_id["tract"], data_id["patch"])
3344 for data_id in registry.queryDataIds(
3345 ["patch"],
3346 dataId={
3347 "visit": nontrivial_observation[0],
3348 "detector": nontrivial_observation[1],
3349 },
3350 instrument="HSC",
3351 )
3352 },
3353 overlaps_by_observation[nontrivial_observation],
3354 )
3356 def test_query_projection_drop_postprocessing(self) -> None:
3357 """Test that projections and deduplications on query objects can
3358 drop post-query region filtering to ensure the query remains in
3359 the SQL engine.
3360 """
3361 registry = self.makeRegistry()
3362 self.loadData(registry, "base.yaml")
3363 self.loadData(registry, "spatial.yaml")
3365 def pop_transfer(tree: Relation) -> Relation:
3366 """If a relation tree terminates with a transfer to a new engine,
3367 return the relation prior to that transfer. If not, return the
3368 original relation.
3369 """
3370 match tree:
3371 case Transfer(target=target):
3372 return target
3373 case _:
3374 return tree
3376 # There's no public way to get a Query object yet, so we get one from a
3377 # DataCoordinateQueryResults private attribute. When a public API is
3378 # available this test should use it.
3379 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3380 # We expect this query to terminate in the iteration engine originally,
3381 # because region-filtering is necessary.
3382 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3383 # If we deduplicate, we usually have to do that downstream of the
3384 # filtering. That means the deduplication has to happen in the
3385 # iteration engine.
3386 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3387 # If we pass drop_postprocessing, we instead drop the region filtering
3388 # so the deduplication can happen in SQL (though there might still be
3389 # transfer to iteration at the tail of the tree that we can ignore;
3390 # that's what the pop_transfer takes care of here).
3391 self.assertIsInstance(
3392 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3393 sql.Engine,
3394 )