Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%
1420 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-22 02:07 -0700
« prev ^ index » next coverage.py v6.5.0, created at 2023-03-22 02:07 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from lsst.daf.relation import RelationalAlgebraError
47from ...core import (
48 DataCoordinate,
49 DataCoordinateSet,
50 DatasetAssociation,
51 DatasetRef,
52 DatasetType,
53 DimensionGraph,
54 NamedValueSet,
55 SkyPixDimension,
56 StorageClass,
57 Timespan,
58 ddl,
59)
60from .._collection_summary import CollectionSummary
61from .._collectionType import CollectionType
62from .._config import RegistryConfig
63from .._exceptions import (
64 ArgumentError,
65 CollectionError,
66 CollectionTypeError,
67 ConflictingDefinitionError,
68 DataIdValueError,
69 DatasetTypeError,
70 InconsistentDataIdError,
71 MissingCollectionError,
72 MissingDatasetTypeError,
73 OrphanedRecordError,
74)
75from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
77if TYPE_CHECKING: 77 ↛ 78line 77 didn't jump to line 78, because the condition on line 77 was never true
78 from .._registry import Registry
81class RegistryTests(ABC):
82 """Generic tests for the `Registry` class that can be subclassed to
83 generate tests for different configurations.
84 """
86 collectionsManager: Optional[str] = None
87 """Name of the collections manager class, if subclass provides value for
88 this member then it overrides name specified in default configuration
89 (`str`).
90 """
92 datasetsManager: Optional[str] = None
93 """Name of the datasets manager class, if subclass provides value for
94 this member then it overrides name specified in default configuration
95 (`str`).
96 """
98 @classmethod
99 @abstractmethod
100 def getDataDir(cls) -> str:
101 """Return the root directory containing test data YAML files."""
102 raise NotImplementedError()
104 def makeRegistryConfig(self) -> RegistryConfig:
105 """Create RegistryConfig used to create a registry.
107 This method should be called by a subclass from `makeRegistry`.
108 Returned instance will be pre-configured based on the values of class
109 members, and default-configured for all other parameters. Subclasses
110 that need default configuration should just instantiate
111 `RegistryConfig` directly.
112 """
113 config = RegistryConfig()
114 if self.collectionsManager:
115 config["managers", "collections"] = self.collectionsManager
116 if self.datasetsManager:
117 config["managers", "datasets"] = self.datasetsManager
118 return config
120 @abstractmethod
121 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
122 """Return the Registry instance to be tested.
124 Parameters
125 ----------
126 share_repo_with : `Registry`, optional
127 If provided, the new registry should point to the same data
128 repository as this existing registry.
130 Returns
131 -------
132 registry : `Registry`
133 New `Registry` instance, or `None` *only* if `share_repo_with` is
134 not `None` and this test case does not support that argument
135 (e.g. it is impossible with in-memory SQLite DBs).
136 """
137 raise NotImplementedError()
139 def loadData(self, registry: Registry, filename: str):
140 """Load registry test data from ``getDataDir/<filename>``,
141 which should be a YAML import/export file.
142 """
143 from ...transfers import YamlRepoImportBackend
145 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
146 backend = YamlRepoImportBackend(stream, registry)
147 backend.register()
148 backend.load(datastore=None)
150 def checkQueryResults(self, results, expected):
151 """Check that a query results object contains expected values.
153 Parameters
154 ----------
155 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
156 A lazy-evaluation query results object.
157 expected : `list`
158 A list of `DataCoordinate` o `DatasetRef` objects that should be
159 equal to results of the query, aside from ordering.
160 """
161 self.assertCountEqual(list(results), expected)
162 self.assertEqual(results.count(), len(expected))
163 if expected:
164 self.assertTrue(results.any())
165 else:
166 self.assertFalse(results.any())
168 def testOpaque(self):
169 """Tests for `Registry.registerOpaqueTable`,
170 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
171 `Registry.deleteOpaqueData`.
172 """
173 registry = self.makeRegistry()
174 table = "opaque_table_for_testing"
175 registry.registerOpaqueTable(
176 table,
177 spec=ddl.TableSpec(
178 fields=[
179 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
180 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
181 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
182 ],
183 ),
184 )
185 rows = [
186 {"id": 1, "name": "one", "count": None},
187 {"id": 2, "name": "two", "count": 5},
188 {"id": 3, "name": "three", "count": 6},
189 ]
190 registry.insertOpaqueData(table, *rows)
191 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
192 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
193 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
194 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
196 # Test very long IN clause which exceeds sqlite limit on number of
197 # parameters. SQLite says the limit is 32k but it looks like it is
198 # much higher.
199 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
200 # Two IN clauses, each longer than 1k batch size, first with
201 # duplicates, second has matching elements in different batches (after
202 # sorting).
203 self.assertEqual(
204 rows[0:2],
205 list(
206 registry.fetchOpaqueData(
207 table,
208 id=list(range(1000)) + list(range(100, 0, -1)),
209 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
210 )
211 ),
212 )
213 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
214 registry.deleteOpaqueData(table, id=3)
215 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
216 registry.deleteOpaqueData(table)
217 self.assertEqual([], list(registry.fetchOpaqueData(table)))
219 def testDatasetType(self):
220 """Tests for `Registry.registerDatasetType` and
221 `Registry.getDatasetType`.
222 """
223 registry = self.makeRegistry()
224 # Check valid insert
225 datasetTypeName = "test"
226 storageClass = StorageClass("testDatasetType")
227 registry.storageClasses.registerStorageClass(storageClass)
228 dimensions = registry.dimensions.extract(("instrument", "visit"))
229 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
230 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
231 # Inserting for the first time should return True
232 self.assertTrue(registry.registerDatasetType(inDatasetType))
233 outDatasetType1 = registry.getDatasetType(datasetTypeName)
234 self.assertEqual(outDatasetType1, inDatasetType)
236 # Re-inserting should work
237 self.assertFalse(registry.registerDatasetType(inDatasetType))
238 # Except when they are not identical
239 with self.assertRaises(ConflictingDefinitionError):
240 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
241 registry.registerDatasetType(nonIdenticalDatasetType)
243 # Template can be None
244 datasetTypeName = "testNoneTemplate"
245 storageClass = StorageClass("testDatasetType2")
246 registry.storageClasses.registerStorageClass(storageClass)
247 dimensions = registry.dimensions.extract(("instrument", "visit"))
248 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
249 registry.registerDatasetType(inDatasetType)
250 outDatasetType2 = registry.getDatasetType(datasetTypeName)
251 self.assertEqual(outDatasetType2, inDatasetType)
253 allTypes = set(registry.queryDatasetTypes())
254 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
256 def testDimensions(self):
257 """Tests for `Registry.insertDimensionData`,
258 `Registry.syncDimensionData`, and `Registry.expandDataId`.
259 """
260 registry = self.makeRegistry()
261 dimensionName = "instrument"
262 dimension = registry.dimensions[dimensionName]
263 dimensionValue = {
264 "name": "DummyCam",
265 "visit_max": 10,
266 "visit_system": 0,
267 "exposure_max": 10,
268 "detector_max": 2,
269 "class_name": "lsst.pipe.base.Instrument",
270 }
271 registry.insertDimensionData(dimensionName, dimensionValue)
272 # Inserting the same value twice should fail
273 with self.assertRaises(sqlalchemy.exc.IntegrityError):
274 registry.insertDimensionData(dimensionName, dimensionValue)
275 # expandDataId should retrieve the record we just inserted
276 self.assertEqual(
277 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
278 .records[dimensionName]
279 .toDict(),
280 dimensionValue,
281 )
282 # expandDataId should raise if there is no record with the given ID.
283 with self.assertRaises(DataIdValueError):
284 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
285 # band doesn't have a table; insert should fail.
286 with self.assertRaises(TypeError):
287 registry.insertDimensionData("band", {"band": "i"})
288 dimensionName2 = "physical_filter"
289 dimension2 = registry.dimensions[dimensionName2]
290 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
291 # Missing required dependency ("instrument") should fail
292 with self.assertRaises(KeyError):
293 registry.insertDimensionData(dimensionName2, dimensionValue2)
294 # Adding required dependency should fix the failure
295 dimensionValue2["instrument"] = "DummyCam"
296 registry.insertDimensionData(dimensionName2, dimensionValue2)
297 # expandDataId should retrieve the record we just inserted.
298 self.assertEqual(
299 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
300 .records[dimensionName2]
301 .toDict(),
302 dimensionValue2,
303 )
304 # Use syncDimensionData to insert a new record successfully.
305 dimensionName3 = "detector"
306 dimensionValue3 = {
307 "instrument": "DummyCam",
308 "id": 1,
309 "full_name": "one",
310 "name_in_raft": "zero",
311 "purpose": "SCIENCE",
312 }
313 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
314 # Sync that again. Note that one field ("raft") is NULL, and that
315 # should be okay.
316 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
317 # Now try that sync with the same primary key but a different value.
318 # This should fail.
319 with self.assertRaises(ConflictingDefinitionError):
320 registry.syncDimensionData(
321 dimensionName3,
322 {
323 "instrument": "DummyCam",
324 "id": 1,
325 "full_name": "one",
326 "name_in_raft": "four",
327 "purpose": "SCIENCE",
328 },
329 )
331 @unittest.skipIf(np is None, "numpy not available.")
332 def testNumpyDataId(self):
333 """Test that we can use a numpy int in a dataId."""
334 registry = self.makeRegistry()
335 dimensionEntries = [
336 ("instrument", {"instrument": "DummyCam"}),
337 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
338 # Using an np.int64 here fails unless Records.fromDict is also
339 # patched to look for numbers.Integral
340 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
341 ]
342 for args in dimensionEntries:
343 registry.insertDimensionData(*args)
345 # Try a normal integer and something that looks like an int but
346 # is not.
347 for visit_id in (42, np.int64(42)):
348 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
349 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
350 self.assertEqual(expanded["visit"], int(visit_id))
351 self.assertIsInstance(expanded["visit"], int)
353 def testDataIdRelationships(self):
354 """Test that `Registry.expandDataId` raises an exception when the given
355 keys are inconsistent.
356 """
357 registry = self.makeRegistry()
358 self.loadData(registry, "base.yaml")
359 # Insert a few more dimension records for the next test.
360 registry.insertDimensionData(
361 "exposure",
362 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
363 )
364 registry.insertDimensionData(
365 "exposure",
366 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
367 )
368 registry.insertDimensionData(
369 "visit_system",
370 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
371 )
372 registry.insertDimensionData(
373 "visit",
374 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
375 )
376 registry.insertDimensionData(
377 "visit_definition",
378 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
379 )
380 with self.assertRaises(InconsistentDataIdError):
381 registry.expandDataId(
382 {"instrument": "Cam1", "visit": 1, "exposure": 2},
383 )
385 def testDataset(self):
386 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
387 and `Registry.removeDatasets`.
388 """
389 registry = self.makeRegistry()
390 self.loadData(registry, "base.yaml")
391 run = "tésτ"
392 registry.registerRun(run)
393 datasetType = registry.getDatasetType("bias")
394 dataId = {"instrument": "Cam1", "detector": 2}
395 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
396 outRef = registry.getDataset(ref.id)
397 self.assertIsNotNone(ref.id)
398 self.assertEqual(ref, outRef)
399 with self.assertRaises(ConflictingDefinitionError):
400 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
401 registry.removeDatasets([ref])
402 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
404 def testFindDataset(self):
405 """Tests for `Registry.findDataset`."""
406 registry = self.makeRegistry()
407 self.loadData(registry, "base.yaml")
408 run = "tésτ"
409 datasetType = registry.getDatasetType("bias")
410 dataId = {"instrument": "Cam1", "detector": 4}
411 registry.registerRun(run)
412 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
413 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
414 self.assertEqual(outputRef, inputRef)
415 # Check that retrieval with invalid dataId raises
416 with self.assertRaises(LookupError):
417 dataId = {"instrument": "Cam1"} # no detector
418 registry.findDataset(datasetType, dataId, collections=run)
419 # Check that different dataIds match to different datasets
420 dataId1 = {"instrument": "Cam1", "detector": 1}
421 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
422 dataId2 = {"instrument": "Cam1", "detector": 2}
423 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
424 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
425 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
426 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
427 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
428 # Check that requesting a non-existing dataId returns None
429 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
430 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
431 # Search more than one collection, in which two have the right
432 # dataset type and another does not.
433 registry.registerRun("empty")
434 self.loadData(registry, "datasets-uuid.yaml")
435 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
436 self.assertIsNotNone(bias1)
437 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
438 self.assertIsNotNone(bias2)
439 self.assertEqual(
440 bias1,
441 registry.findDataset(
442 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
443 ),
444 )
445 self.assertEqual(
446 bias2,
447 registry.findDataset(
448 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
449 ),
450 )
451 # Search more than one collection, with one of them a CALIBRATION
452 # collection.
453 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
454 timespan = Timespan(
455 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
456 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
457 )
458 registry.certify("Cam1/calib", [bias2], timespan=timespan)
459 self.assertEqual(
460 bias1,
461 registry.findDataset(
462 "bias",
463 instrument="Cam1",
464 detector=2,
465 collections=["empty", "imported_g", "Cam1/calib"],
466 timespan=timespan,
467 ),
468 )
469 self.assertEqual(
470 bias2,
471 registry.findDataset(
472 "bias",
473 instrument="Cam1",
474 detector=2,
475 collections=["empty", "Cam1/calib", "imported_g"],
476 timespan=timespan,
477 ),
478 )
479 # If we try to search those same collections without a timespan, it
480 # should still work, since the CALIBRATION collection is ignored.
481 self.assertEqual(
482 bias1,
483 registry.findDataset(
484 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
485 ),
486 )
487 self.assertEqual(
488 bias1,
489 registry.findDataset(
490 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
491 ),
492 )
494 def testRemoveDatasetTypeSuccess(self):
495 """Test that Registry.removeDatasetType works when there are no
496 datasets of that type present.
497 """
498 registry = self.makeRegistry()
499 self.loadData(registry, "base.yaml")
500 registry.removeDatasetType("flat")
501 with self.assertRaises(MissingDatasetTypeError):
502 registry.getDatasetType("flat")
504 def testRemoveDatasetTypeFailure(self):
505 """Test that Registry.removeDatasetType raises when there are datasets
506 of that type present or if the dataset type is for a component.
507 """
508 registry = self.makeRegistry()
509 self.loadData(registry, "base.yaml")
510 self.loadData(registry, "datasets.yaml")
511 with self.assertRaises(OrphanedRecordError):
512 registry.removeDatasetType("flat")
513 with self.assertRaises(ValueError):
514 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
516 def testImportDatasetsUUID(self):
517 """Test for `Registry._importDatasets` with UUID dataset ID."""
518 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
519 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
521 registry = self.makeRegistry()
522 self.loadData(registry, "base.yaml")
523 for run in range(6):
524 registry.registerRun(f"run{run}")
525 datasetTypeBias = registry.getDatasetType("bias")
526 datasetTypeFlat = registry.getDatasetType("flat")
527 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
528 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
529 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
531 dataset_id = uuid.uuid4()
532 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
533 (ref1,) = registry._importDatasets([ref])
534 # UUID is used without change
535 self.assertEqual(ref.id, ref1.id)
537 # All different failure modes
538 refs = (
539 # Importing same DatasetRef with different dataset ID is an error
540 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
541 # Same DatasetId but different DataId
542 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
543 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
544 # Same DatasetRef and DatasetId but different run
545 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
546 )
547 for ref in refs:
548 with self.assertRaises(ConflictingDefinitionError):
549 registry._importDatasets([ref])
551 # Test for non-unique IDs, they can be re-imported multiple times.
552 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
553 with self.subTest(idGenMode=idGenMode):
554 # Use integer dataset ID to force UUID calculation in _import
555 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
556 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
557 self.assertIsInstance(ref1.id, uuid.UUID)
558 self.assertEqual(ref1.id.version, 5)
560 # Importing it again is OK
561 (ref2,) = registry._importDatasets([ref1])
562 self.assertEqual(ref2.id, ref1.id)
564 # Cannot import to different run with the same ID
565 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
566 with self.assertRaises(ConflictingDefinitionError):
567 registry._importDatasets([ref])
569 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
570 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
571 # Cannot import same DATAID_TYPE ref into a new run
572 with self.assertRaises(ConflictingDefinitionError):
573 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
574 else:
575 # DATAID_TYPE_RUN ref can be imported into a new run
576 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
578 def testDatasetTypeComponentQueries(self):
579 """Test component options when querying for dataset types.
581 All of the behavior here is deprecated, so many of these tests are
582 currently wrapped in a context to check that we get a warning whenever
583 a component dataset is actually returned.
584 """
585 registry = self.makeRegistry()
586 self.loadData(registry, "base.yaml")
587 self.loadData(registry, "datasets.yaml")
588 # Test querying for dataset types with different inputs.
589 # First query for all dataset types; components should only be included
590 # when components=True.
591 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
592 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
593 with self.assertWarns(FutureWarning):
594 self.assertLess(
595 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
596 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
597 )
598 # Use a pattern that can match either parent or components. Again,
599 # components are only returned if components=True.
600 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
601 self.assertEqual(
602 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
603 )
604 with self.assertWarns(FutureWarning):
605 self.assertLess(
606 {"bias", "bias.wcs"},
607 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
608 )
609 # This pattern matches only a component. In this case we also return
610 # that component dataset type if components=None.
611 with self.assertWarns(FutureWarning):
612 self.assertEqual(
613 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
614 )
615 self.assertEqual(
616 set(),
617 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
618 )
619 with self.assertWarns(FutureWarning):
620 self.assertEqual(
621 {"bias.wcs"},
622 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
623 )
624 # Add a dataset type using a StorageClass that we'll then remove; check
625 # that this does not affect our ability to query for dataset types
626 # (though it will warn).
627 tempStorageClass = StorageClass(
628 name="TempStorageClass",
629 components={
630 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
631 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
632 },
633 )
634 registry.storageClasses.registerStorageClass(tempStorageClass)
635 datasetType = DatasetType(
636 "temporary",
637 dimensions=["instrument"],
638 storageClass=tempStorageClass,
639 universe=registry.dimensions,
640 )
641 registry.registerDatasetType(datasetType)
642 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
643 datasetType._storageClass = None
644 del tempStorageClass
645 # Querying for all dataset types, including components, should include
646 # at least all non-component dataset types (and I don't want to
647 # enumerate all of the Exposure components for bias and flat here).
648 with self.assertWarns(FutureWarning):
649 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
650 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
651 self.assertIn("TempStorageClass", cm.output[0])
652 self.assertLess({"bias", "flat", "temporary"}, everything.names)
653 # It should not include "temporary.columns", because we tried to remove
654 # the storage class that would tell it about that. So if the next line
655 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
656 # this part of the test isn't doing anything, because the _unregister
657 # call about isn't simulating the real-life case we want it to
658 # simulate, in which different versions of daf_butler in entirely
659 # different Python processes interact with the same repo.
660 self.assertNotIn("temporary.data", everything.names)
661 # Query for dataset types that start with "temp". This should again
662 # not include the component, and also not fail.
663 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
664 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
665 self.assertIn("TempStorageClass", cm.output[0])
666 self.assertEqual({"temporary"}, startsWithTemp.names)
667 # Querying with no components should not warn at all.
668 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
669 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
670 # Must issue a warning of our own to be captured.
671 logging.getLogger("lsst.daf.butler.registries").warning("test message")
672 self.assertEqual(len(cm.output), 1)
673 self.assertIn("test message", cm.output[0])
675 def testComponentLookups(self):
676 """Test searching for component datasets via their parents.
678 All of the behavior here is deprecated, so many of these tests are
679 currently wrapped in a context to check that we get a warning whenever
680 a component dataset is actually returned.
681 """
682 registry = self.makeRegistry()
683 self.loadData(registry, "base.yaml")
684 self.loadData(registry, "datasets.yaml")
685 # Test getting the child dataset type (which does still exist in the
686 # Registry), and check for consistency with
687 # DatasetRef.makeComponentRef.
688 collection = "imported_g"
689 parentType = registry.getDatasetType("bias")
690 childType = registry.getDatasetType("bias.wcs")
691 parentRefResolved = registry.findDataset(
692 parentType, collections=collection, instrument="Cam1", detector=1
693 )
694 self.assertIsInstance(parentRefResolved, DatasetRef)
695 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
696 # Search for a single dataset with findDataset.
697 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
698 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
699 # Search for detector data IDs constrained by component dataset
700 # existence with queryDataIds.
701 with self.assertWarns(FutureWarning):
702 dataIds = registry.queryDataIds(
703 ["detector"],
704 datasets=["bias.wcs"],
705 collections=collection,
706 ).toSet()
707 self.assertEqual(
708 dataIds,
709 DataCoordinateSet(
710 {
711 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
712 for d in (1, 2, 3)
713 },
714 parentType.dimensions,
715 ),
716 )
717 # Search for multiple datasets of a single type with queryDatasets.
718 with self.assertWarns(FutureWarning):
719 childRefs2 = set(
720 registry.queryDatasets(
721 "bias.wcs",
722 collections=collection,
723 )
724 )
725 self.assertEqual(
726 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
727 )
729 def testCollections(self):
730 """Tests for registry methods that manage collections."""
731 registry = self.makeRegistry()
732 other_registry = self.makeRegistry(share_repo_with=registry)
733 self.loadData(registry, "base.yaml")
734 self.loadData(registry, "datasets.yaml")
735 run1 = "imported_g"
736 run2 = "imported_r"
737 # Test setting a collection docstring after it has been created.
738 registry.setCollectionDocumentation(run1, "doc for run1")
739 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
740 registry.setCollectionDocumentation(run1, None)
741 self.assertIsNone(registry.getCollectionDocumentation(run1))
742 datasetType = "bias"
743 # Find some datasets via their run's collection.
744 dataId1 = {"instrument": "Cam1", "detector": 1}
745 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
746 self.assertIsNotNone(ref1)
747 dataId2 = {"instrument": "Cam1", "detector": 2}
748 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
749 self.assertIsNotNone(ref2)
750 # Associate those into a new collection, then look for them there.
751 tag1 = "tag1"
752 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
753 # Check that we can query for old and new collections by type.
754 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
755 self.assertEqual(
756 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
757 {tag1, run1, run2},
758 )
759 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
760 registry.associate(tag1, [ref1, ref2])
761 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
762 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
763 # Disassociate one and verify that we can't it there anymore...
764 registry.disassociate(tag1, [ref1])
765 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
766 # ...but we can still find ref2 in tag1, and ref1 in the run.
767 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
768 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
769 collections = set(registry.queryCollections())
770 self.assertEqual(collections, {run1, run2, tag1})
771 # Associate both refs into tag1 again; ref2 is already there, but that
772 # should be a harmless no-op.
773 registry.associate(tag1, [ref1, ref2])
774 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
775 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
776 # Get a different dataset (from a different run) that has the same
777 # dataset type and data ID as ref2.
778 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
779 self.assertNotEqual(ref2, ref2b)
780 # Attempting to associate that into tag1 should be an error.
781 with self.assertRaises(ConflictingDefinitionError):
782 registry.associate(tag1, [ref2b])
783 # That error shouldn't have messed up what we had before.
784 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
785 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
786 # Attempt to associate the conflicting dataset again, this time with
787 # a dataset that isn't in the collection and won't cause a conflict.
788 # Should also fail without modifying anything.
789 dataId3 = {"instrument": "Cam1", "detector": 3}
790 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
791 with self.assertRaises(ConflictingDefinitionError):
792 registry.associate(tag1, [ref3, ref2b])
793 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
794 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
795 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
796 # Register a chained collection that searches [tag1, run2]
797 chain1 = "chain1"
798 registry.registerCollection(chain1, type=CollectionType.CHAINED)
799 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
800 # Chained collection exists, but has no collections in it.
801 self.assertFalse(registry.getCollectionChain(chain1))
802 # If we query for all collections, we should get the chained collection
803 # only if we don't ask to flatten it (i.e. yield only its children).
804 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
805 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
806 # Attempt to set its child collections to something circular; that
807 # should fail.
808 with self.assertRaises(ValueError):
809 registry.setCollectionChain(chain1, [tag1, chain1])
810 # Add the child collections.
811 registry.setCollectionChain(chain1, [tag1, run2])
812 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
813 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
814 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
815 # Refresh the other registry that points to the same repo, and make
816 # sure it can see the things we've done (note that this does require
817 # an explicit refresh(); that's the documented behavior, because
818 # caching is ~impossible otherwise).
819 if other_registry is not None:
820 other_registry.refresh()
821 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
822 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
823 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
824 # Searching for dataId1 or dataId2 in the chain should return ref1 and
825 # ref2, because both are in tag1.
826 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
827 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
828 # Now disassociate ref2 from tag1. The search (for bias) with
829 # dataId2 in chain1 should then:
830 # 1. not find it in tag1
831 # 2. find a different dataset in run2
832 registry.disassociate(tag1, [ref2])
833 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
834 self.assertNotEqual(ref2b, ref2)
835 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
836 # Define a new chain so we can test recursive chains.
837 chain2 = "chain2"
838 registry.registerCollection(chain2, type=CollectionType.CHAINED)
839 registry.setCollectionChain(chain2, [run2, chain1])
840 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
841 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
842 # Query for collections matching a regex.
843 self.assertCountEqual(
844 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
845 ["imported_r", "imported_g"],
846 )
847 # Query for collections matching a regex or an explicit str.
848 self.assertCountEqual(
849 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
850 ["imported_r", "imported_g", "chain1"],
851 )
852 # Search for bias with dataId1 should find it via tag1 in chain2,
853 # recursing, because is not in run1.
854 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
855 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
856 # Search for bias with dataId2 should find it in run2 (ref2b).
857 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
858 # Search for a flat that is in run2. That should not be found
859 # at the front of chain2, because of the restriction to bias
860 # on run2 there, but it should be found in at the end of chain1.
861 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
862 ref4 = registry.findDataset("flat", dataId4, collections=run2)
863 self.assertIsNotNone(ref4)
864 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
865 # Deleting a collection that's part of a CHAINED collection is not
866 # allowed, and is exception-safe.
867 with self.assertRaises(Exception):
868 registry.removeCollection(run2)
869 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
870 with self.assertRaises(Exception):
871 registry.removeCollection(chain1)
872 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
873 # Actually remove chain2, test that it's gone by asking for its type.
874 registry.removeCollection(chain2)
875 with self.assertRaises(MissingCollectionError):
876 registry.getCollectionType(chain2)
877 # Actually remove run2 and chain1, which should work now.
878 registry.removeCollection(chain1)
879 registry.removeCollection(run2)
880 with self.assertRaises(MissingCollectionError):
881 registry.getCollectionType(run2)
882 with self.assertRaises(MissingCollectionError):
883 registry.getCollectionType(chain1)
884 # Remove tag1 as well, just to test that we can remove TAGGED
885 # collections.
886 registry.removeCollection(tag1)
887 with self.assertRaises(MissingCollectionError):
888 registry.getCollectionType(tag1)
890 def testCollectionChainFlatten(self):
891 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
892 registry = self.makeRegistry()
893 registry.registerCollection("inner", CollectionType.CHAINED)
894 registry.registerCollection("innermost", CollectionType.RUN)
895 registry.setCollectionChain("inner", ["innermost"])
896 registry.registerCollection("outer", CollectionType.CHAINED)
897 registry.setCollectionChain("outer", ["inner"], flatten=False)
898 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
899 registry.setCollectionChain("outer", ["inner"], flatten=True)
900 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
902 def testBasicTransaction(self):
903 """Test that all operations within a single transaction block are
904 rolled back if an exception propagates out of the block.
905 """
906 registry = self.makeRegistry()
907 storageClass = StorageClass("testDatasetType")
908 registry.storageClasses.registerStorageClass(storageClass)
909 with registry.transaction():
910 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
911 with self.assertRaises(ValueError):
912 with registry.transaction():
913 registry.insertDimensionData("instrument", {"name": "Cam2"})
914 raise ValueError("Oops, something went wrong")
915 # Cam1 should exist
916 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
917 # But Cam2 and Cam3 should both not exist
918 with self.assertRaises(DataIdValueError):
919 registry.expandDataId(instrument="Cam2")
920 with self.assertRaises(DataIdValueError):
921 registry.expandDataId(instrument="Cam3")
923 def testNestedTransaction(self):
924 """Test that operations within a transaction block are not rolled back
925 if an exception propagates out of an inner transaction block and is
926 then caught.
927 """
928 registry = self.makeRegistry()
929 dimension = registry.dimensions["instrument"]
930 dataId1 = {"instrument": "DummyCam"}
931 dataId2 = {"instrument": "DummyCam2"}
932 checkpointReached = False
933 with registry.transaction():
934 # This should be added and (ultimately) committed.
935 registry.insertDimensionData(dimension, dataId1)
936 with self.assertRaises(sqlalchemy.exc.IntegrityError):
937 with registry.transaction(savepoint=True):
938 # This does not conflict, and should succeed (but not
939 # be committed).
940 registry.insertDimensionData(dimension, dataId2)
941 checkpointReached = True
942 # This should conflict and raise, triggerring a rollback
943 # of the previous insertion within the same transaction
944 # context, but not the original insertion in the outer
945 # block.
946 registry.insertDimensionData(dimension, dataId1)
947 self.assertTrue(checkpointReached)
948 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
949 with self.assertRaises(DataIdValueError):
950 registry.expandDataId(dataId2, graph=dimension.graph)
952 def testInstrumentDimensions(self):
953 """Test queries involving only instrument dimensions, with no joins to
954 skymap."""
955 registry = self.makeRegistry()
957 # need a bunch of dimensions and datasets for test
958 registry.insertDimensionData(
959 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
960 )
961 registry.insertDimensionData(
962 "physical_filter",
963 dict(instrument="DummyCam", name="dummy_r", band="r"),
964 dict(instrument="DummyCam", name="dummy_i", band="i"),
965 )
966 registry.insertDimensionData(
967 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
968 )
969 registry.insertDimensionData(
970 "visit_system",
971 dict(instrument="DummyCam", id=1, name="default"),
972 )
973 registry.insertDimensionData(
974 "visit",
975 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
976 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
977 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
978 )
979 for i in range(1, 6):
980 registry.insertDimensionData(
981 "visit_detector_region",
982 dict(instrument="DummyCam", visit=10, detector=i),
983 dict(instrument="DummyCam", visit=11, detector=i),
984 dict(instrument="DummyCam", visit=20, detector=i),
985 )
986 registry.insertDimensionData(
987 "exposure",
988 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
989 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
990 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
991 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
992 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
993 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
994 )
995 registry.insertDimensionData(
996 "visit_definition",
997 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
998 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
999 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1000 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1001 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1002 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1003 )
1004 # dataset types
1005 run1 = "test1_r"
1006 run2 = "test2_r"
1007 tagged2 = "test2_t"
1008 registry.registerRun(run1)
1009 registry.registerRun(run2)
1010 registry.registerCollection(tagged2)
1011 storageClass = StorageClass("testDataset")
1012 registry.storageClasses.registerStorageClass(storageClass)
1013 rawType = DatasetType(
1014 name="RAW",
1015 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1016 storageClass=storageClass,
1017 )
1018 registry.registerDatasetType(rawType)
1019 calexpType = DatasetType(
1020 name="CALEXP",
1021 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1022 storageClass=storageClass,
1023 )
1024 registry.registerDatasetType(calexpType)
1026 # add pre-existing datasets
1027 for exposure in (100, 101, 110, 111):
1028 for detector in (1, 2, 3):
1029 # note that only 3 of 5 detectors have datasets
1030 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1031 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1032 # exposures 100 and 101 appear in both run1 and tagged2.
1033 # 100 has different datasets in the different collections
1034 # 101 has the same dataset in both collections.
1035 if exposure == 100:
1036 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1037 if exposure in (100, 101):
1038 registry.associate(tagged2, [ref])
1039 # Add pre-existing datasets to tagged2.
1040 for exposure in (200, 201):
1041 for detector in (3, 4, 5):
1042 # note that only 3 of 5 detectors have datasets
1043 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1044 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1045 registry.associate(tagged2, [ref])
1047 dimensions = DimensionGraph(
1048 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1049 )
1050 # Test that single dim string works as well as list of str
1051 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1052 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1053 self.assertEqual(rows, rowsI)
1054 # with empty expression
1055 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1056 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1057 for dataId in rows:
1058 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1059 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
1060 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
1061 self.assertEqual(
1062 packer1.unpack(packer1.pack(dataId)),
1063 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1064 )
1065 self.assertEqual(
1066 packer2.unpack(packer2.pack(dataId)),
1067 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1068 )
1069 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1070 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1071 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1072 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1074 # second collection
1075 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1076 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1077 for dataId in rows:
1078 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1079 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1080 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1081 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1083 # with two input datasets
1084 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1085 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1086 for dataId in rows:
1087 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1088 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1089 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1090 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1092 # limit to single visit
1093 rows = registry.queryDataIds(
1094 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1095 ).toSet()
1096 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1097 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1098 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1099 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1101 # more limiting expression, using link names instead of Table.column
1102 rows = registry.queryDataIds(
1103 dimensions,
1104 datasets=rawType,
1105 collections=run1,
1106 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1107 ).toSet()
1108 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1109 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1110 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1111 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1113 # queryDataIds with only one of `datasets` and `collections` is an
1114 # error.
1115 with self.assertRaises(CollectionError):
1116 registry.queryDataIds(dimensions, datasets=rawType)
1117 with self.assertRaises(ArgumentError):
1118 registry.queryDataIds(dimensions, collections=run1)
1120 # expression excludes everything
1121 rows = registry.queryDataIds(
1122 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1123 ).toSet()
1124 self.assertEqual(len(rows), 0)
1126 # Selecting by physical_filter, this is not in the dimensions, but it
1127 # is a part of the full expression so it should work too.
1128 rows = registry.queryDataIds(
1129 dimensions,
1130 datasets=rawType,
1131 collections=run1,
1132 where="physical_filter = 'dummy_r'",
1133 instrument="DummyCam",
1134 ).toSet()
1135 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1136 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1137 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1138 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1140 def testSkyMapDimensions(self):
1141 """Tests involving only skymap dimensions, no joins to instrument."""
1142 registry = self.makeRegistry()
1144 # need a bunch of dimensions and datasets for test, we want
1145 # "band" in the test so also have to add physical_filter
1146 # dimensions
1147 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1148 registry.insertDimensionData(
1149 "physical_filter",
1150 dict(instrument="DummyCam", name="dummy_r", band="r"),
1151 dict(instrument="DummyCam", name="dummy_i", band="i"),
1152 )
1153 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1154 for tract in range(10):
1155 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1156 registry.insertDimensionData(
1157 "patch",
1158 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1159 )
1161 # dataset types
1162 run = "tésτ"
1163 registry.registerRun(run)
1164 storageClass = StorageClass("testDataset")
1165 registry.storageClasses.registerStorageClass(storageClass)
1166 calexpType = DatasetType(
1167 name="deepCoadd_calexp",
1168 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1169 storageClass=storageClass,
1170 )
1171 registry.registerDatasetType(calexpType)
1172 mergeType = DatasetType(
1173 name="deepCoadd_mergeDet",
1174 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1175 storageClass=storageClass,
1176 )
1177 registry.registerDatasetType(mergeType)
1178 measType = DatasetType(
1179 name="deepCoadd_meas",
1180 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1181 storageClass=storageClass,
1182 )
1183 registry.registerDatasetType(measType)
1185 dimensions = DimensionGraph(
1186 registry.dimensions,
1187 dimensions=(
1188 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1189 ),
1190 )
1192 # add pre-existing datasets
1193 for tract in (1, 3, 5):
1194 for patch in (2, 4, 6, 7):
1195 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1196 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1197 for aFilter in ("i", "r"):
1198 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1199 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1201 # with empty expression
1202 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1203 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1204 for dataId in rows:
1205 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1206 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1207 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1208 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1210 # limit to 2 tracts and 2 patches
1211 rows = registry.queryDataIds(
1212 dimensions,
1213 datasets=[calexpType, mergeType],
1214 collections=run,
1215 where="tract IN (1, 5) AND patch IN (2, 7)",
1216 skymap="DummyMap",
1217 ).toSet()
1218 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1219 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1220 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1221 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1223 # limit to single filter
1224 rows = registry.queryDataIds(
1225 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1226 ).toSet()
1227 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1228 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1229 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1230 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1232 # Specifying non-existing skymap is an exception
1233 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1234 rows = registry.queryDataIds(
1235 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1236 ).toSet()
1238 def testSpatialJoin(self):
1239 """Test queries that involve spatial overlap joins."""
1240 registry = self.makeRegistry()
1241 self.loadData(registry, "hsc-rc2-subset.yaml")
1243 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1244 # the TopologicalFamily they belong to. We'll relate all elements in
1245 # each family to all of the elements in each other family.
1246 families = defaultdict(set)
1247 # Dictionary of {element.name: {dataId: region}}.
1248 regions = {}
1249 for element in registry.dimensions.getDatabaseElements():
1250 if element.spatial is not None:
1251 families[element.spatial.name].add(element)
1252 regions[element.name] = {
1253 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1254 }
1256 # If this check fails, it's not necessarily a problem - it may just be
1257 # a reasonable change to the default dimension definitions - but the
1258 # test below depends on there being more than one family to do anything
1259 # useful.
1260 self.assertEqual(len(families), 2)
1262 # Overlap DatabaseDimensionElements with each other.
1263 for family1, family2 in itertools.combinations(families, 2):
1264 for element1, element2 in itertools.product(families[family1], families[family2]):
1265 graph = DimensionGraph.union(element1.graph, element2.graph)
1266 # Construct expected set of overlapping data IDs via a
1267 # brute-force comparison of the regions we've already fetched.
1268 expected = {
1269 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1270 for (dataId1, region1), (dataId2, region2) in itertools.product(
1271 regions[element1.name].items(), regions[element2.name].items()
1272 )
1273 if not region1.isDisjointFrom(region2)
1274 }
1275 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1276 queried = set(registry.queryDataIds(graph))
1277 self.assertEqual(expected, queried)
1279 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1280 commonSkyPix = registry.dimensions.commonSkyPix
1281 for elementName, regions in regions.items():
1282 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1283 expected = set()
1284 for dataId, region in regions.items():
1285 for begin, end in commonSkyPix.pixelization.envelope(region):
1286 expected.update(
1287 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1288 for index in range(begin, end)
1289 )
1290 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1291 queried = set(registry.queryDataIds(graph))
1292 self.assertEqual(expected, queried)
1294 def testAbstractQuery(self):
1295 """Test that we can run a query that just lists the known
1296 bands. This is tricky because band is
1297 backed by a query against physical_filter.
1298 """
1299 registry = self.makeRegistry()
1300 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1301 registry.insertDimensionData(
1302 "physical_filter",
1303 dict(instrument="DummyCam", name="dummy_i", band="i"),
1304 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1305 dict(instrument="DummyCam", name="dummy_r", band="r"),
1306 )
1307 rows = registry.queryDataIds(["band"]).toSet()
1308 self.assertCountEqual(
1309 rows,
1310 [
1311 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1312 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1313 ],
1314 )
1316 def testAttributeManager(self):
1317 """Test basic functionality of attribute manager."""
1318 # number of attributes with schema versions in a fresh database,
1319 # 6 managers with 3 records per manager, plus config for dimensions
1320 VERSION_COUNT = 6 * 3 + 1
1322 registry = self.makeRegistry()
1323 attributes = registry._managers.attributes
1325 # check what get() returns for non-existing key
1326 self.assertIsNone(attributes.get("attr"))
1327 self.assertEqual(attributes.get("attr", ""), "")
1328 self.assertEqual(attributes.get("attr", "Value"), "Value")
1329 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1331 # cannot store empty key or value
1332 with self.assertRaises(ValueError):
1333 attributes.set("", "value")
1334 with self.assertRaises(ValueError):
1335 attributes.set("attr", "")
1337 # set value of non-existing key
1338 attributes.set("attr", "value")
1339 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1340 self.assertEqual(attributes.get("attr"), "value")
1342 # update value of existing key
1343 with self.assertRaises(ButlerAttributeExistsError):
1344 attributes.set("attr", "value2")
1346 attributes.set("attr", "value2", force=True)
1347 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1348 self.assertEqual(attributes.get("attr"), "value2")
1350 # delete existing key
1351 self.assertTrue(attributes.delete("attr"))
1352 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1354 # delete non-existing key
1355 self.assertFalse(attributes.delete("non-attr"))
1357 # store bunch of keys and get the list back
1358 data = [
1359 ("version.core", "1.2.3"),
1360 ("version.dimensions", "3.2.1"),
1361 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1362 ]
1363 for key, value in data:
1364 attributes.set(key, value)
1365 items = dict(attributes.items())
1366 for key, value in data:
1367 self.assertEqual(items[key], value)
1369 def testQueryDatasetsDeduplication(self):
1370 """Test that the findFirst option to queryDatasets selects datasets
1371 from collections in the order given".
1372 """
1373 registry = self.makeRegistry()
1374 self.loadData(registry, "base.yaml")
1375 self.loadData(registry, "datasets.yaml")
1376 self.assertCountEqual(
1377 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1378 [
1379 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1380 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1381 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1384 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1385 ],
1386 )
1387 self.assertCountEqual(
1388 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1389 [
1390 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1391 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1392 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1393 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1394 ],
1395 )
1396 self.assertCountEqual(
1397 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1398 [
1399 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1400 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1401 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1402 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1403 ],
1404 )
1406 def testQueryResults(self):
1407 """Test querying for data IDs and then manipulating the QueryResults
1408 object returned to perform other queries.
1409 """
1410 registry = self.makeRegistry()
1411 self.loadData(registry, "base.yaml")
1412 self.loadData(registry, "datasets.yaml")
1413 bias = registry.getDatasetType("bias")
1414 flat = registry.getDatasetType("flat")
1415 # Obtain expected results from methods other than those we're testing
1416 # here. That includes:
1417 # - the dimensions of the data IDs we want to query:
1418 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1419 # - the dimensions of some other data IDs we'll extract from that:
1420 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1421 # - the data IDs we expect to obtain from the first queries:
1422 expectedDataIds = DataCoordinateSet(
1423 {
1424 DataCoordinate.standardize(
1425 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1426 )
1427 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1428 },
1429 graph=expectedGraph,
1430 hasFull=False,
1431 hasRecords=False,
1432 )
1433 # - the flat datasets we expect to find from those data IDs, in just
1434 # one collection (so deduplication is irrelevant):
1435 expectedFlats = [
1436 registry.findDataset(
1437 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1438 ),
1439 registry.findDataset(
1440 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1441 ),
1442 registry.findDataset(
1443 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1444 ),
1445 ]
1446 # - the data IDs we expect to extract from that:
1447 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1448 # - the bias datasets we expect to find from those data IDs, after we
1449 # subset-out the physical_filter dimension, both with duplicates:
1450 expectedAllBiases = [
1451 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1452 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1453 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1456 ]
1457 # - ...and without duplicates:
1458 expectedDeduplicatedBiases = [
1459 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1460 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1461 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1462 ]
1463 # Test against those expected results, using a "lazy" query for the
1464 # data IDs (which re-executes that query each time we use it to do
1465 # something new).
1466 dataIds = registry.queryDataIds(
1467 ["detector", "physical_filter"],
1468 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1469 instrument="Cam1",
1470 )
1471 self.assertEqual(dataIds.graph, expectedGraph)
1472 self.assertEqual(dataIds.toSet(), expectedDataIds)
1473 self.assertCountEqual(
1474 list(
1475 dataIds.findDatasets(
1476 flat,
1477 collections=["imported_r"],
1478 )
1479 ),
1480 expectedFlats,
1481 )
1482 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1483 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1484 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1485 self.assertCountEqual(
1486 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1487 expectedAllBiases,
1488 )
1489 self.assertCountEqual(
1490 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1491 expectedDeduplicatedBiases,
1492 )
1494 # Check dimensions match.
1495 with self.assertRaises(ValueError):
1496 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1498 # Use a component dataset type.
1499 self.assertCountEqual(
1500 [
1501 ref.makeComponentRef("image")
1502 for ref in subsetDataIds.findDatasets(
1503 bias,
1504 collections=["imported_r", "imported_g"],
1505 findFirst=False,
1506 )
1507 ],
1508 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1509 )
1511 # Use a named dataset type that does not exist and a dataset type
1512 # object that does not exist.
1513 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1515 # Test both string name and dataset type object.
1516 test_type: Union[str, DatasetType]
1517 for test_type, test_type_name in (
1518 (unknown_type, unknown_type.name),
1519 (unknown_type.name, unknown_type.name),
1520 ):
1521 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1522 list(
1523 subsetDataIds.findDatasets(
1524 test_type, collections=["imported_r", "imported_g"], findFirst=True
1525 )
1526 )
1528 # Materialize the bias dataset queries (only) by putting the results
1529 # into temporary tables, then repeat those tests.
1530 with subsetDataIds.findDatasets(
1531 bias, collections=["imported_r", "imported_g"], findFirst=False
1532 ).materialize() as biases:
1533 self.assertCountEqual(list(biases), expectedAllBiases)
1534 with subsetDataIds.findDatasets(
1535 bias, collections=["imported_r", "imported_g"], findFirst=True
1536 ).materialize() as biases:
1537 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1538 # Materialize the data ID subset query, but not the dataset queries.
1539 with subsetDataIds.materialize() as subsetDataIds:
1540 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1541 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1542 self.assertCountEqual(
1543 list(
1544 subsetDataIds.findDatasets(
1545 bias, collections=["imported_r", "imported_g"], findFirst=False
1546 )
1547 ),
1548 expectedAllBiases,
1549 )
1550 self.assertCountEqual(
1551 list(
1552 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1553 ),
1554 expectedDeduplicatedBiases,
1555 )
1556 # Materialize the dataset queries, too.
1557 with subsetDataIds.findDatasets(
1558 bias, collections=["imported_r", "imported_g"], findFirst=False
1559 ).materialize() as biases:
1560 self.assertCountEqual(list(biases), expectedAllBiases)
1561 with subsetDataIds.findDatasets(
1562 bias, collections=["imported_r", "imported_g"], findFirst=True
1563 ).materialize() as biases:
1564 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1565 # Materialize the original query, but none of the follow-up queries.
1566 with dataIds.materialize() as dataIds:
1567 self.assertEqual(dataIds.graph, expectedGraph)
1568 self.assertEqual(dataIds.toSet(), expectedDataIds)
1569 self.assertCountEqual(
1570 list(
1571 dataIds.findDatasets(
1572 flat,
1573 collections=["imported_r"],
1574 )
1575 ),
1576 expectedFlats,
1577 )
1578 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1579 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1580 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1581 self.assertCountEqual(
1582 list(
1583 subsetDataIds.findDatasets(
1584 bias, collections=["imported_r", "imported_g"], findFirst=False
1585 )
1586 ),
1587 expectedAllBiases,
1588 )
1589 self.assertCountEqual(
1590 list(
1591 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1592 ),
1593 expectedDeduplicatedBiases,
1594 )
1595 # Materialize just the bias dataset queries.
1596 with subsetDataIds.findDatasets(
1597 bias, collections=["imported_r", "imported_g"], findFirst=False
1598 ).materialize() as biases:
1599 self.assertCountEqual(list(biases), expectedAllBiases)
1600 with subsetDataIds.findDatasets(
1601 bias, collections=["imported_r", "imported_g"], findFirst=True
1602 ).materialize() as biases:
1603 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1604 # Materialize the subset data ID query, but not the dataset
1605 # queries.
1606 with subsetDataIds.materialize() as subsetDataIds:
1607 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1608 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1609 self.assertCountEqual(
1610 list(
1611 subsetDataIds.findDatasets(
1612 bias, collections=["imported_r", "imported_g"], findFirst=False
1613 )
1614 ),
1615 expectedAllBiases,
1616 )
1617 self.assertCountEqual(
1618 list(
1619 subsetDataIds.findDatasets(
1620 bias, collections=["imported_r", "imported_g"], findFirst=True
1621 )
1622 ),
1623 expectedDeduplicatedBiases,
1624 )
1625 # Materialize the bias dataset queries, too, so now we're
1626 # materializing every single step.
1627 with subsetDataIds.findDatasets(
1628 bias, collections=["imported_r", "imported_g"], findFirst=False
1629 ).materialize() as biases:
1630 self.assertCountEqual(list(biases), expectedAllBiases)
1631 with subsetDataIds.findDatasets(
1632 bias, collections=["imported_r", "imported_g"], findFirst=True
1633 ).materialize() as biases:
1634 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1636 def testStorageClassPropagation(self):
1637 """Test that queries for datasets respect the storage class passed in
1638 as part of a full dataset type.
1639 """
1640 registry = self.makeRegistry()
1641 self.loadData(registry, "base.yaml")
1642 dataset_type_in_registry = DatasetType(
1643 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions
1644 )
1645 registry.registerDatasetType(dataset_type_in_registry)
1646 run = "run1"
1647 registry.registerRun(run)
1648 (inserted_ref,) = registry.insertDatasets(
1649 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1650 )
1651 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1652 query_dataset_type = DatasetType(
1653 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions
1654 )
1655 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1656 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1657 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1658 (query_datasets_ref,) = query_datasets_result
1659 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1660 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1661 query_dataset_type, collections=[run]
1662 )
1663 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1664 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1665 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1666 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1667 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1668 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1669 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1671 def testEmptyDimensionsQueries(self):
1672 """Test Query and QueryResults objects in the case where there are no
1673 dimensions.
1674 """
1675 # Set up test data: one dataset type, two runs, one dataset in each.
1676 registry = self.makeRegistry()
1677 self.loadData(registry, "base.yaml")
1678 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1679 registry.registerDatasetType(schema)
1680 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1681 run1 = "run1"
1682 run2 = "run2"
1683 registry.registerRun(run1)
1684 registry.registerRun(run2)
1685 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1686 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1687 # Query directly for both of the datasets, and each one, one at a time.
1688 self.checkQueryResults(
1689 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1690 )
1691 self.checkQueryResults(
1692 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1693 [dataset1],
1694 )
1695 self.checkQueryResults(
1696 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1697 [dataset2],
1698 )
1699 # Query for data IDs with no dimensions.
1700 dataIds = registry.queryDataIds([])
1701 self.checkQueryResults(dataIds, [dataId])
1702 # Use queried data IDs to find the datasets.
1703 self.checkQueryResults(
1704 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1705 [dataset1, dataset2],
1706 )
1707 self.checkQueryResults(
1708 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1709 [dataset1],
1710 )
1711 self.checkQueryResults(
1712 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1713 [dataset2],
1714 )
1715 # Now materialize the data ID query results and repeat those tests.
1716 with dataIds.materialize() as dataIds:
1717 self.checkQueryResults(dataIds, [dataId])
1718 self.checkQueryResults(
1719 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1720 [dataset1],
1721 )
1722 self.checkQueryResults(
1723 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1724 [dataset2],
1725 )
1726 # Query for non-empty data IDs, then subset that to get the empty one.
1727 # Repeat the above tests starting from that.
1728 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1729 self.checkQueryResults(dataIds, [dataId])
1730 self.checkQueryResults(
1731 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1732 [dataset1, dataset2],
1733 )
1734 self.checkQueryResults(
1735 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1736 [dataset1],
1737 )
1738 self.checkQueryResults(
1739 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1740 [dataset2],
1741 )
1742 with dataIds.materialize() as dataIds:
1743 self.checkQueryResults(dataIds, [dataId])
1744 self.checkQueryResults(
1745 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1746 [dataset1, dataset2],
1747 )
1748 self.checkQueryResults(
1749 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1750 [dataset1],
1751 )
1752 self.checkQueryResults(
1753 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1754 [dataset2],
1755 )
1756 # Query for non-empty data IDs, then materialize, then subset to get
1757 # the empty one. Repeat again.
1758 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1759 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1760 self.checkQueryResults(dataIds, [dataId])
1761 self.checkQueryResults(
1762 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1763 [dataset1, dataset2],
1764 )
1765 self.checkQueryResults(
1766 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1767 [dataset1],
1768 )
1769 self.checkQueryResults(
1770 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1771 [dataset2],
1772 )
1773 with dataIds.materialize() as dataIds:
1774 self.checkQueryResults(dataIds, [dataId])
1775 self.checkQueryResults(
1776 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1777 [dataset1, dataset2],
1778 )
1779 self.checkQueryResults(
1780 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1781 [dataset1],
1782 )
1783 self.checkQueryResults(
1784 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1785 [dataset2],
1786 )
1787 # Query for non-empty data IDs with a constraint on an empty-data-ID
1788 # dataset that exists.
1789 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1790 self.checkQueryResults(
1791 dataIds.subset(unique=True),
1792 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1793 )
1794 # Again query for non-empty data IDs with a constraint on empty-data-ID
1795 # datasets, but when the datasets don't exist. We delete the existing
1796 # dataset and query just that collection rather than creating a new
1797 # empty collection because this is a bit less likely for our build-time
1798 # logic to shortcut-out (via the collection summaries), and such a
1799 # shortcut would make this test a bit more trivial than we'd like.
1800 registry.removeDatasets([dataset2])
1801 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1802 self.checkQueryResults(dataIds, [])
1804 def testDimensionDataModifications(self):
1805 """Test that modifying dimension records via:
1806 syncDimensionData(..., update=True) and
1807 insertDimensionData(..., replace=True) works as expected, even in the
1808 presence of datasets using those dimensions and spatial overlap
1809 relationships.
1810 """
1812 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1813 """Unpack a sphgeom.RangeSet into the integers it contains."""
1814 for begin, end in ranges:
1815 yield from range(begin, end)
1817 def range_set_hull(
1818 ranges: lsst.sphgeom.RangeSet,
1819 pixelization: lsst.sphgeom.HtmPixelization,
1820 ) -> lsst.sphgeom.ConvexPolygon:
1821 """Create a ConvexPolygon hull of the region defined by a set of
1822 HTM pixelization index ranges.
1823 """
1824 points = []
1825 for index in unpack_range_set(ranges):
1826 points.extend(pixelization.triangle(index).getVertices())
1827 return lsst.sphgeom.ConvexPolygon(points)
1829 # Use HTM to set up an initial parent region (one arbitrary trixel)
1830 # and four child regions (the trixels within the parent at the next
1831 # level. We'll use the parent as a tract/visit region and the children
1832 # as its patch/visit_detector regions.
1833 registry = self.makeRegistry()
1834 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1835 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1836 index = 12288
1837 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1838 assert htm6.universe().contains(child_ranges_small)
1839 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1840 parent_region_small = lsst.sphgeom.ConvexPolygon(
1841 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1842 )
1843 assert all(parent_region_small.contains(c) for c in child_regions_small)
1844 # Make a larger version of each child region, defined to be the set of
1845 # htm6 trixels that overlap the original's bounding circle. Make a new
1846 # parent that's the convex hull of the new children.
1847 child_regions_large = [
1848 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1849 ]
1850 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1851 parent_region_large = lsst.sphgeom.ConvexPolygon(
1852 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1853 )
1854 assert all(parent_region_large.contains(c) for c in child_regions_large)
1855 assert parent_region_large.contains(parent_region_small)
1856 assert not parent_region_small.contains(parent_region_large)
1857 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1858 # Find some commonSkyPix indices that overlap the large regions but not
1859 # overlap the small regions. We use commonSkyPix here to make sure the
1860 # real tests later involve what's in the database, not just post-query
1861 # filtering of regions.
1862 child_difference_indices = []
1863 for large, small in zip(child_regions_large, child_regions_small):
1864 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1865 assert difference, "if this is empty, we can't test anything useful with these regions"
1866 assert all(
1867 not commonSkyPix.triangle(d).isDisjointFrom(large)
1868 and commonSkyPix.triangle(d).isDisjointFrom(small)
1869 for d in difference
1870 )
1871 child_difference_indices.append(difference)
1872 parent_difference_indices = list(
1873 unpack_range_set(
1874 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1875 )
1876 )
1877 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1878 assert all(
1879 (
1880 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1881 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1882 )
1883 for d in parent_difference_indices
1884 )
1885 # Now that we've finally got those regions, we'll insert the large ones
1886 # as tract/patch dimension records.
1887 skymap_name = "testing_v1"
1888 registry.insertDimensionData(
1889 "skymap",
1890 {
1891 "name": skymap_name,
1892 "hash": bytes([42]),
1893 "tract_max": 1,
1894 "patch_nx_max": 2,
1895 "patch_ny_max": 2,
1896 },
1897 )
1898 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1899 registry.insertDimensionData(
1900 "patch",
1901 *[
1902 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1903 for n, c in enumerate(child_regions_large)
1904 ],
1905 )
1906 # Add at dataset that uses these dimensions to make sure that modifying
1907 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1908 # implement insert with replace=True as delete-then-insert).
1909 dataset_type = DatasetType(
1910 "coadd",
1911 dimensions=["tract", "patch"],
1912 universe=registry.dimensions,
1913 storageClass="Exposure",
1914 )
1915 registry.registerDatasetType(dataset_type)
1916 registry.registerCollection("the_run", CollectionType.RUN)
1917 registry.insertDatasets(
1918 dataset_type,
1919 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1920 run="the_run",
1921 )
1922 # Query for tracts and patches that overlap some "difference" htm9
1923 # pixels; there should be overlaps, because the database has
1924 # the "large" suite of regions.
1925 self.assertEqual(
1926 {0},
1927 {
1928 data_id["tract"]
1929 for data_id in registry.queryDataIds(
1930 ["tract"],
1931 skymap=skymap_name,
1932 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1933 )
1934 },
1935 )
1936 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1937 self.assertIn(
1938 patch_id,
1939 {
1940 data_id["patch"]
1941 for data_id in registry.queryDataIds(
1942 ["patch"],
1943 skymap=skymap_name,
1944 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1945 )
1946 },
1947 )
1948 # Use sync to update the tract region and insert to update the regions
1949 # of the patches, to the "small" suite.
1950 updated = registry.syncDimensionData(
1951 "tract",
1952 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1953 update=True,
1954 )
1955 self.assertEqual(updated, {"region": parent_region_large})
1956 registry.insertDimensionData(
1957 "patch",
1958 *[
1959 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1960 for n, c in enumerate(child_regions_small)
1961 ],
1962 replace=True,
1963 )
1964 # Query again; there now should be no such overlaps, because the
1965 # database has the "small" suite of regions.
1966 self.assertFalse(
1967 set(
1968 registry.queryDataIds(
1969 ["tract"],
1970 skymap=skymap_name,
1971 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1972 )
1973 )
1974 )
1975 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1976 self.assertNotIn(
1977 patch_id,
1978 {
1979 data_id["patch"]
1980 for data_id in registry.queryDataIds(
1981 ["patch"],
1982 skymap=skymap_name,
1983 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1984 )
1985 },
1986 )
1987 # Update back to the large regions and query one more time.
1988 updated = registry.syncDimensionData(
1989 "tract",
1990 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1991 update=True,
1992 )
1993 self.assertEqual(updated, {"region": parent_region_small})
1994 registry.insertDimensionData(
1995 "patch",
1996 *[
1997 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1998 for n, c in enumerate(child_regions_large)
1999 ],
2000 replace=True,
2001 )
2002 self.assertEqual(
2003 {0},
2004 {
2005 data_id["tract"]
2006 for data_id in registry.queryDataIds(
2007 ["tract"],
2008 skymap=skymap_name,
2009 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2010 )
2011 },
2012 )
2013 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2014 self.assertIn(
2015 patch_id,
2016 {
2017 data_id["patch"]
2018 for data_id in registry.queryDataIds(
2019 ["patch"],
2020 skymap=skymap_name,
2021 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2022 )
2023 },
2024 )
2026 def testCalibrationCollections(self):
2027 """Test operations on `~CollectionType.CALIBRATION` collections,
2028 including `Registry.certify`, `Registry.decertify`, and
2029 `Registry.findDataset`.
2030 """
2031 # Setup - make a Registry, fill it with some datasets in
2032 # non-calibration collections.
2033 registry = self.makeRegistry()
2034 self.loadData(registry, "base.yaml")
2035 self.loadData(registry, "datasets.yaml")
2036 # Set up some timestamps.
2037 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2038 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2039 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2040 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2041 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2042 allTimespans = [
2043 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2044 ]
2045 # Get references to some datasets.
2046 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2047 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2048 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2049 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2050 # Register the main calibration collection we'll be working with.
2051 collection = "Cam1/calibs/default"
2052 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2053 # Cannot associate into a calibration collection (no timespan).
2054 with self.assertRaises(CollectionTypeError):
2055 registry.associate(collection, [bias2a])
2056 # Certify 2a dataset with [t2, t4) validity.
2057 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2058 # Test that we can query for this dataset via the new collection, both
2059 # on its own and with a RUN collection, as long as we don't try to join
2060 # in temporal dimensions or use findFirst=True.
2061 self.assertEqual(
2062 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2063 {bias2a},
2064 )
2065 self.assertEqual(
2066 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2067 {
2068 bias2a,
2069 bias2b,
2070 bias3b,
2071 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2072 },
2073 )
2074 self.assertEqual(
2075 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2076 {registry.expandDataId(instrument="Cam1", detector=2)},
2077 )
2078 self.assertEqual(
2079 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2080 {
2081 registry.expandDataId(instrument="Cam1", detector=2),
2082 registry.expandDataId(instrument="Cam1", detector=3),
2083 registry.expandDataId(instrument="Cam1", detector=4),
2084 },
2085 )
2087 # We should not be able to certify 2b with anything overlapping that
2088 # window.
2089 with self.assertRaises(ConflictingDefinitionError):
2090 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2091 with self.assertRaises(ConflictingDefinitionError):
2092 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2093 with self.assertRaises(ConflictingDefinitionError):
2094 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2095 with self.assertRaises(ConflictingDefinitionError):
2096 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2097 with self.assertRaises(ConflictingDefinitionError):
2098 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2099 with self.assertRaises(ConflictingDefinitionError):
2100 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2101 with self.assertRaises(ConflictingDefinitionError):
2102 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2103 with self.assertRaises(ConflictingDefinitionError):
2104 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2105 # We should be able to certify 3a with a range overlapping that window,
2106 # because it's for a different detector.
2107 # We'll certify 3a over [t1, t3).
2108 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2109 # Now we'll certify 2b and 3b together over [t4, ∞).
2110 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2112 # Fetch all associations and check that they are what we expect.
2113 self.assertCountEqual(
2114 list(
2115 registry.queryDatasetAssociations(
2116 "bias",
2117 collections=[collection, "imported_g", "imported_r"],
2118 )
2119 ),
2120 [
2121 DatasetAssociation(
2122 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2123 collection="imported_g",
2124 timespan=None,
2125 ),
2126 DatasetAssociation(
2127 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2128 collection="imported_r",
2129 timespan=None,
2130 ),
2131 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2132 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2133 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2134 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2135 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2136 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2137 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2138 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2139 ],
2140 )
2142 class Ambiguous:
2143 """Tag class to denote lookups that should be ambiguous."""
2145 pass
2147 def assertLookup(
2148 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2149 ) -> None:
2150 """Local function that asserts that a bias lookup returns the given
2151 expected result.
2152 """
2153 if expected is Ambiguous:
2154 with self.assertRaises((DatasetTypeError, LookupError)):
2155 registry.findDataset(
2156 "bias",
2157 collections=collection,
2158 instrument="Cam1",
2159 detector=detector,
2160 timespan=timespan,
2161 )
2162 else:
2163 self.assertEqual(
2164 expected,
2165 registry.findDataset(
2166 "bias",
2167 collections=collection,
2168 instrument="Cam1",
2169 detector=detector,
2170 timespan=timespan,
2171 ),
2172 )
2174 # Systematically test lookups against expected results.
2175 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2176 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2177 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2178 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2179 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2180 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2181 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2182 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2183 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2184 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2185 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2186 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2187 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2188 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2189 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2190 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2191 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2192 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2193 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2194 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2195 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2196 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2197 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2198 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2199 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2200 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2201 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2202 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2203 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2204 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2205 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2206 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2207 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2208 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2209 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2210 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2211 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2212 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2213 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2214 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2215 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2216 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2218 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2219 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2220 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2221 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2222 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2223 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2224 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2225 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2226 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2227 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2228 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2229 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2230 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2231 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2232 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2233 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2234 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2235 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2236 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2237 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2238 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2239 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2240 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2241 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2242 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2243 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2244 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2245 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2246 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2247 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2248 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2249 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2250 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2251 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2252 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2253 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2254 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2255 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2256 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2257 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2258 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2259 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2260 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2261 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2262 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2263 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2265 # Decertify everything, this time with explicit data IDs, then check
2266 # that no lookups succeed.
2267 registry.decertify(
2268 collection,
2269 "bias",
2270 Timespan(None, None),
2271 dataIds=[
2272 dict(instrument="Cam1", detector=2),
2273 dict(instrument="Cam1", detector=3),
2274 ],
2275 )
2276 for detector in (2, 3):
2277 for timespan in allTimespans:
2278 assertLookup(detector=detector, timespan=timespan, expected=None)
2279 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2280 # those.
2281 registry.certify(
2282 collection,
2283 [bias2a, bias3a],
2284 Timespan(None, None),
2285 )
2286 for timespan in allTimespans:
2287 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2288 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2289 # Decertify just bias2 over [t2, t4).
2290 # This should split a single certification row into two (and leave the
2291 # other existing row, for bias3a, alone).
2292 registry.decertify(
2293 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2294 )
2295 for timespan in allTimespans:
2296 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2297 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2298 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2299 if overlapsBefore and overlapsAfter:
2300 expected = Ambiguous
2301 elif overlapsBefore or overlapsAfter:
2302 expected = bias2a
2303 else:
2304 expected = None
2305 assertLookup(detector=2, timespan=timespan, expected=expected)
2307 def testSkipCalibs(self):
2308 """Test how queries handle skipping of calibration collections."""
2309 registry = self.makeRegistry()
2310 self.loadData(registry, "base.yaml")
2311 self.loadData(registry, "datasets.yaml")
2313 coll_calib = "Cam1/calibs/default"
2314 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2316 # Add all biases to the calibration collection.
2317 # Without this, the logic that prunes dataset subqueries based on
2318 # datasetType-collection summary information will fire before the logic
2319 # we want to test below. This is a good thing (it avoids the dreaded
2320 # NotImplementedError a bit more often) everywhere but here.
2321 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2323 coll_list = [coll_calib, "imported_g", "imported_r"]
2324 chain = "Cam1/chain"
2325 registry.registerCollection(chain, type=CollectionType.CHAINED)
2326 registry.setCollectionChain(chain, coll_list)
2328 # explicit list will raise if findFirst=True or there are temporal
2329 # dimensions
2330 with self.assertRaises(NotImplementedError):
2331 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2332 with self.assertRaises(NotImplementedError):
2333 registry.queryDataIds(
2334 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2335 ).count()
2337 # chain will skip
2338 datasets = list(registry.queryDatasets("bias", collections=chain))
2339 self.assertGreater(len(datasets), 0)
2341 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2342 self.assertGreater(len(dataIds), 0)
2344 # glob will skip too
2345 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2346 self.assertGreater(len(datasets), 0)
2348 # regular expression will skip too
2349 pattern = re.compile(".*")
2350 datasets = list(registry.queryDatasets("bias", collections=pattern))
2351 self.assertGreater(len(datasets), 0)
2353 # ellipsis should work as usual
2354 datasets = list(registry.queryDatasets("bias", collections=...))
2355 self.assertGreater(len(datasets), 0)
2357 # few tests with findFirst
2358 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2359 self.assertGreater(len(datasets), 0)
2361 def testIngestTimeQuery(self):
2362 registry = self.makeRegistry()
2363 self.loadData(registry, "base.yaml")
2364 dt0 = datetime.utcnow()
2365 self.loadData(registry, "datasets.yaml")
2366 dt1 = datetime.utcnow()
2368 datasets = list(registry.queryDatasets(..., collections=...))
2369 len0 = len(datasets)
2370 self.assertGreater(len0, 0)
2372 where = "ingest_date > T'2000-01-01'"
2373 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2374 len1 = len(datasets)
2375 self.assertEqual(len0, len1)
2377 # no one will ever use this piece of software in 30 years
2378 where = "ingest_date > T'2050-01-01'"
2379 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2380 len2 = len(datasets)
2381 self.assertEqual(len2, 0)
2383 # Check more exact timing to make sure there is no 37 seconds offset
2384 # (after fixing DM-30124). SQLite time precision is 1 second, make
2385 # sure that we don't test with higher precision.
2386 tests = [
2387 # format: (timestamp, operator, expected_len)
2388 (dt0 - timedelta(seconds=1), ">", len0),
2389 (dt0 - timedelta(seconds=1), "<", 0),
2390 (dt1 + timedelta(seconds=1), "<", len0),
2391 (dt1 + timedelta(seconds=1), ">", 0),
2392 ]
2393 for dt, op, expect_len in tests:
2394 dt_str = dt.isoformat(sep=" ")
2396 where = f"ingest_date {op} T'{dt_str}'"
2397 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2398 self.assertEqual(len(datasets), expect_len)
2400 # same with bind using datetime or astropy Time
2401 where = f"ingest_date {op} ingest_time"
2402 datasets = list(
2403 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2404 )
2405 self.assertEqual(len(datasets), expect_len)
2407 dt_astropy = astropy.time.Time(dt, format="datetime")
2408 datasets = list(
2409 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2410 )
2411 self.assertEqual(len(datasets), expect_len)
2413 def testTimespanQueries(self):
2414 """Test query expressions involving timespans."""
2415 registry = self.makeRegistry()
2416 self.loadData(registry, "hsc-rc2-subset.yaml")
2417 # All exposures in the database; mapping from ID to timespan.
2418 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2419 # Just those IDs, sorted (which is also temporal sorting, because HSC
2420 # exposure IDs are monotonically increasing).
2421 ids = sorted(visits.keys())
2422 self.assertGreater(len(ids), 20)
2423 # Pick some quasi-random indexes into `ids` to play with.
2424 i1 = int(len(ids) * 0.1)
2425 i2 = int(len(ids) * 0.3)
2426 i3 = int(len(ids) * 0.6)
2427 i4 = int(len(ids) * 0.8)
2428 # Extract some times from those: just before the beginning of i1 (which
2429 # should be after the end of the exposure before), exactly the
2430 # beginning of i2, just after the beginning of i3 (and before its end),
2431 # and the exact end of i4.
2432 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2433 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2434 t2 = visits[ids[i2]].begin
2435 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2436 self.assertLess(t3, visits[ids[i3]].end)
2437 t4 = visits[ids[i4]].end
2438 # Make sure those are actually in order.
2439 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2441 bind = {
2442 "t1": t1,
2443 "t2": t2,
2444 "t3": t3,
2445 "t4": t4,
2446 "ts23": Timespan(t2, t3),
2447 }
2449 def query(where):
2450 """Helper function that queries for visit data IDs and returns
2451 results as a sorted, deduplicated list of visit IDs.
2452 """
2453 return sorted(
2454 {
2455 dataId["visit"]
2456 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2457 }
2458 )
2460 # Try a bunch of timespan queries, mixing up the bounds themselves,
2461 # where they appear in the expression, and how we get the timespan into
2462 # the expression.
2464 # t1 is before the start of i1, so this should not include i1.
2465 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2466 # t2 is exactly at the start of i2, but ends are exclusive, so these
2467 # should not include i2.
2468 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2469 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2470 # t3 is in the middle of i3, so this should include i3.
2471 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2472 # This one should not include t3 by the same reasoning.
2473 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2474 # t4 is exactly at the end of i4, so this should include i4.
2475 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2476 # i4's upper bound of t4 is exclusive so this should not include t4.
2477 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2479 # Now some timespan vs. time scalar queries.
2480 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2481 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2482 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2483 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2484 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2485 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2487 # Empty timespans should not overlap anything.
2488 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2490 def testCollectionSummaries(self):
2491 """Test recording and retrieval of collection summaries."""
2492 self.maxDiff = None
2493 registry = self.makeRegistry()
2494 # Importing datasets from yaml should go through the code path where
2495 # we update collection summaries as we insert datasets.
2496 self.loadData(registry, "base.yaml")
2497 self.loadData(registry, "datasets.yaml")
2498 flat = registry.getDatasetType("flat")
2499 expected1 = CollectionSummary()
2500 expected1.dataset_types.add(registry.getDatasetType("bias"))
2501 expected1.add_data_ids(
2502 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2503 )
2504 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2505 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2506 # Create a chained collection with both of the imported runs; the
2507 # summary should be the same, because it's a union with itself.
2508 chain = "chain"
2509 registry.registerCollection(chain, CollectionType.CHAINED)
2510 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2511 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2512 # Associate flats only into a tagged collection and a calibration
2513 # collection to check summaries of those.
2514 tag = "tag"
2515 registry.registerCollection(tag, CollectionType.TAGGED)
2516 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2517 calibs = "calibs"
2518 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2519 registry.certify(
2520 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2521 )
2522 expected2 = expected1.copy()
2523 expected2.dataset_types.discard("bias")
2524 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2525 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2526 # Explicitly calling Registry.refresh() should load those same
2527 # summaries, via a totally different code path.
2528 registry.refresh()
2529 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2530 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2531 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2532 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2534 def testBindInQueryDatasets(self):
2535 """Test that the bind parameter is correctly forwarded in
2536 queryDatasets recursion.
2537 """
2538 registry = self.makeRegistry()
2539 # Importing datasets from yaml should go through the code path where
2540 # we update collection summaries as we insert datasets.
2541 self.loadData(registry, "base.yaml")
2542 self.loadData(registry, "datasets.yaml")
2543 self.assertEqual(
2544 set(registry.queryDatasets("flat", band="r", collections=...)),
2545 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2546 )
2548 def testQueryIntRangeExpressions(self):
2549 """Test integer range expressions in ``where`` arguments.
2551 Note that our expressions use inclusive stop values, unlike Python's.
2552 """
2553 registry = self.makeRegistry()
2554 self.loadData(registry, "base.yaml")
2555 self.assertEqual(
2556 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2557 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2558 )
2559 self.assertEqual(
2560 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2561 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2562 )
2563 self.assertEqual(
2564 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2565 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2566 )
2568 def testQueryResultSummaries(self):
2569 """Test summary methods like `count`, `any`, and `explain_no_results`
2570 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2571 """
2572 registry = self.makeRegistry()
2573 self.loadData(registry, "base.yaml")
2574 self.loadData(registry, "datasets.yaml")
2575 self.loadData(registry, "spatial.yaml")
2576 # Default test dataset has two collections, each with both flats and
2577 # biases. Add a new collection with only biases.
2578 registry.registerCollection("biases", CollectionType.TAGGED)
2579 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2580 # First query yields two results, and involves no postprocessing.
2581 query1 = registry.queryDataIds(["physical_filter"], band="r")
2582 self.assertTrue(query1.any(execute=False, exact=False))
2583 self.assertTrue(query1.any(execute=True, exact=False))
2584 self.assertTrue(query1.any(execute=True, exact=True))
2585 self.assertEqual(query1.count(exact=False), 2)
2586 self.assertEqual(query1.count(exact=True), 2)
2587 self.assertFalse(list(query1.explain_no_results()))
2588 # Second query should yield no results, which we should see when
2589 # we attempt to expand the data ID.
2590 query2 = registry.queryDataIds(["physical_filter"], band="h")
2591 # There's no execute=False, exact=Fals test here because the behavior
2592 # not something we want to guarantee in this case (and exact=False
2593 # says either answer is legal).
2594 self.assertFalse(query2.any(execute=True, exact=False))
2595 self.assertFalse(query2.any(execute=True, exact=True))
2596 self.assertEqual(query2.count(exact=False), 0)
2597 self.assertEqual(query2.count(exact=True), 0)
2598 self.assertTrue(list(query2.explain_no_results()))
2599 # These queries yield no results due to various problems that can be
2600 # spotted prior to execution, yielding helpful diagnostics.
2601 base_query = registry.queryDataIds(["detector", "physical_filter"])
2602 queries_and_snippets = [
2603 (
2604 # Dataset type name doesn't match any existing dataset types.
2605 registry.queryDatasets("nonexistent", collections=...),
2606 ["nonexistent"],
2607 ),
2608 (
2609 # Dataset type object isn't registered.
2610 registry.queryDatasets(
2611 DatasetType(
2612 "nonexistent",
2613 dimensions=["instrument"],
2614 universe=registry.dimensions,
2615 storageClass="Image",
2616 ),
2617 collections=...,
2618 ),
2619 ["nonexistent"],
2620 ),
2621 (
2622 # No datasets of this type in this collection.
2623 registry.queryDatasets("flat", collections=["biases"]),
2624 ["flat", "biases"],
2625 ),
2626 (
2627 # No datasets of this type in this collection.
2628 base_query.findDatasets("flat", collections=["biases"]),
2629 ["flat", "biases"],
2630 ),
2631 (
2632 # No collections matching at all.
2633 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2634 ["potato"],
2635 ),
2636 ]
2637 # The behavior of these additional queries is slated to change in the
2638 # future, so we also check for deprecation warnings.
2639 with self.assertWarns(FutureWarning):
2640 queries_and_snippets.append(
2641 (
2642 # Dataset type name doesn't match any existing dataset
2643 # types.
2644 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2645 ["nonexistent"],
2646 )
2647 )
2648 with self.assertWarns(FutureWarning):
2649 queries_and_snippets.append(
2650 (
2651 # Dataset type name doesn't match any existing dataset
2652 # types.
2653 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2654 ["nonexistent"],
2655 )
2656 )
2657 for query, snippets in queries_and_snippets:
2658 self.assertFalse(query.any(execute=False, exact=False))
2659 self.assertFalse(query.any(execute=True, exact=False))
2660 self.assertFalse(query.any(execute=True, exact=True))
2661 self.assertEqual(query.count(exact=False), 0)
2662 self.assertEqual(query.count(exact=True), 0)
2663 messages = list(query.explain_no_results())
2664 self.assertTrue(messages)
2665 # Want all expected snippets to appear in at least one message.
2666 self.assertTrue(
2667 any(
2668 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2669 ),
2670 messages,
2671 )
2673 # This query does yield results, but should also emit a warning because
2674 # dataset type patterns to queryDataIds is deprecated; just look for
2675 # the warning.
2676 with self.assertWarns(FutureWarning):
2677 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2679 # These queries yield no results due to problems that can be identified
2680 # by cheap follow-up queries, yielding helpful diagnostics.
2681 for query, snippets in [
2682 (
2683 # No records for one of the involved dimensions.
2684 registry.queryDataIds(["subfilter"]),
2685 ["no rows", "subfilter"],
2686 ),
2687 (
2688 # No records for one of the involved dimensions.
2689 registry.queryDimensionRecords("subfilter"),
2690 ["no rows", "subfilter"],
2691 ),
2692 ]:
2693 self.assertFalse(query.any(execute=True, exact=False))
2694 self.assertFalse(query.any(execute=True, exact=True))
2695 self.assertEqual(query.count(exact=True), 0)
2696 messages = list(query.explain_no_results())
2697 self.assertTrue(messages)
2698 # Want all expected snippets to appear in at least one message.
2699 self.assertTrue(
2700 any(
2701 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2702 ),
2703 messages,
2704 )
2706 # This query yields four overlaps in the database, but one is filtered
2707 # out in postprocessing. The count queries aren't accurate because
2708 # they don't account for duplication that happens due to an internal
2709 # join against commonSkyPix.
2710 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2711 self.assertEqual(
2712 {
2713 DataCoordinate.standardize(
2714 instrument="Cam1",
2715 skymap="SkyMap1",
2716 visit=v,
2717 tract=t,
2718 universe=registry.dimensions,
2719 )
2720 for v, t in [(1, 0), (2, 0), (2, 1)]
2721 },
2722 set(query3),
2723 )
2724 self.assertTrue(query3.any(execute=False, exact=False))
2725 self.assertTrue(query3.any(execute=True, exact=False))
2726 self.assertTrue(query3.any(execute=True, exact=True))
2727 self.assertGreaterEqual(query3.count(exact=False), 4)
2728 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2729 self.assertFalse(list(query3.explain_no_results()))
2730 # This query yields overlaps in the database, but all are filtered
2731 # out in postprocessing. The count queries again aren't very useful.
2732 # We have to use `where=` here to avoid an optimization that
2733 # (currently) skips the spatial postprocess-filtering because it
2734 # recognizes that no spatial join is necessary. That's not ideal, but
2735 # fixing it is out of scope for this ticket.
2736 query4 = registry.queryDataIds(
2737 ["visit", "tract"],
2738 instrument="Cam1",
2739 skymap="SkyMap1",
2740 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2741 )
2742 self.assertFalse(set(query4))
2743 self.assertTrue(query4.any(execute=False, exact=False))
2744 self.assertTrue(query4.any(execute=True, exact=False))
2745 self.assertFalse(query4.any(execute=True, exact=True))
2746 self.assertGreaterEqual(query4.count(exact=False), 1)
2747 self.assertEqual(query4.count(exact=True, discard=True), 0)
2748 messages = query4.explain_no_results()
2749 self.assertTrue(messages)
2750 self.assertTrue(any("overlap" in message for message in messages))
2751 # This query should yield results from one dataset type but not the
2752 # other, which is not registered.
2753 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2754 self.assertTrue(set(query5))
2755 self.assertTrue(query5.any(execute=False, exact=False))
2756 self.assertTrue(query5.any(execute=True, exact=False))
2757 self.assertTrue(query5.any(execute=True, exact=True))
2758 self.assertGreaterEqual(query5.count(exact=False), 1)
2759 self.assertGreaterEqual(query5.count(exact=True), 1)
2760 self.assertFalse(list(query5.explain_no_results()))
2761 # This query applies a selection that yields no results, fully in the
2762 # database. Explaining why it fails involves traversing the relation
2763 # tree and running a LIMIT 1 query at each level that has the potential
2764 # to remove rows.
2765 query6 = registry.queryDimensionRecords(
2766 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2767 )
2768 self.assertEqual(query6.count(exact=True), 0)
2769 messages = query6.explain_no_results()
2770 self.assertTrue(messages)
2771 self.assertTrue(any("no-purpose" in message for message in messages))
2773 def testQueryDataIdsOrderBy(self):
2774 """Test order_by and limit on result returned by queryDataIds()."""
2775 registry = self.makeRegistry()
2776 self.loadData(registry, "base.yaml")
2777 self.loadData(registry, "datasets.yaml")
2778 self.loadData(registry, "spatial.yaml")
2780 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2781 return registry.queryDataIds(
2782 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2783 )
2785 Test = namedtuple(
2786 "testQueryDataIdsOrderByTest",
2787 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2788 defaults=(None, None, None),
2789 )
2791 test_data = (
2792 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2793 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2794 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2795 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2796 Test(
2797 "tract.id,visit.id",
2798 "tract,visit",
2799 ((0, 1), (0, 1), (0, 2)),
2800 limit=(3,),
2801 ),
2802 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2803 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2804 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2805 Test(
2806 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2807 ),
2808 Test(
2809 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2810 ),
2811 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2812 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2813 Test(
2814 "tract,-timespan.begin,timespan.end",
2815 "tract,visit",
2816 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2817 ),
2818 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2819 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2820 Test(
2821 "tract,detector",
2822 "tract,detector",
2823 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2824 datasets="flat",
2825 collections="imported_r",
2826 ),
2827 Test(
2828 "tract,detector.full_name",
2829 "tract,detector",
2830 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2831 datasets="flat",
2832 collections="imported_r",
2833 ),
2834 Test(
2835 "tract,detector.raft,detector.name_in_raft",
2836 "tract,detector",
2837 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2838 datasets="flat",
2839 collections="imported_r",
2840 ),
2841 )
2843 for test in test_data:
2844 order_by = test.order_by.split(",")
2845 keys = test.keys.split(",")
2846 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2847 if test.limit is not None:
2848 query = query.limit(*test.limit)
2849 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2850 self.assertEqual(dataIds, test.result)
2852 # and materialize
2853 query = do_query(keys).order_by(*order_by)
2854 if test.limit is not None:
2855 query = query.limit(*test.limit)
2856 with self.assertRaises(RelationalAlgebraError):
2857 with query.materialize():
2858 pass
2860 # errors in a name
2861 for order_by in ("", "-"):
2862 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2863 list(do_query().order_by(order_by))
2865 for order_by in ("undimension.name", "-undimension.name"):
2866 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2867 list(do_query().order_by(order_by))
2869 for order_by in ("attract", "-attract"):
2870 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2871 list(do_query().order_by(order_by))
2873 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2874 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2876 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2877 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2879 with self.assertRaisesRegex(
2880 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2881 ):
2882 list(do_query("tract").order_by("timespan.begin"))
2884 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2885 list(do_query("tract").order_by("tract.timespan.begin"))
2887 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2888 list(do_query("tract").order_by("tract.name"))
2890 def testQueryDataIdsGovernorExceptions(self):
2891 """Test exceptions raised by queryDataIds() for incorrect governors."""
2892 registry = self.makeRegistry()
2893 self.loadData(registry, "base.yaml")
2894 self.loadData(registry, "datasets.yaml")
2895 self.loadData(registry, "spatial.yaml")
2897 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2898 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2900 Test = namedtuple(
2901 "testQueryDataIdExceptionsTest",
2902 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2903 defaults=(None, None, None, {}, None, 0),
2904 )
2906 test_data = (
2907 Test("tract,visit", count=6),
2908 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2909 Test(
2910 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2911 ),
2912 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2913 Test(
2914 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2915 ),
2916 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2917 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2918 Test(
2919 "tract,visit",
2920 where="instrument=cam AND skymap=map",
2921 bind={"cam": "Cam1", "map": "SkyMap1"},
2922 count=6,
2923 ),
2924 Test(
2925 "tract,visit",
2926 where="instrument=cam AND skymap=map",
2927 bind={"cam": "Cam", "map": "SkyMap"},
2928 exception=DataIdValueError,
2929 ),
2930 )
2932 for test in test_data:
2933 dimensions = test.dimensions.split(",")
2934 if test.exception:
2935 with self.assertRaises(test.exception):
2936 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2937 else:
2938 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2939 self.assertEqual(query.count(discard=True), test.count)
2941 # and materialize
2942 if test.exception:
2943 with self.assertRaises(test.exception):
2944 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2945 with query.materialize() as materialized:
2946 materialized.count(discard=True)
2947 else:
2948 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2949 with query.materialize() as materialized:
2950 self.assertEqual(materialized.count(discard=True), test.count)
2952 def testQueryDimensionRecordsOrderBy(self):
2953 """Test order_by and limit on result returned by
2954 queryDimensionRecords().
2955 """
2956 registry = self.makeRegistry()
2957 self.loadData(registry, "base.yaml")
2958 self.loadData(registry, "datasets.yaml")
2959 self.loadData(registry, "spatial.yaml")
2961 def do_query(element, datasets=None, collections=None):
2962 return registry.queryDimensionRecords(
2963 element, instrument="Cam1", datasets=datasets, collections=collections
2964 )
2966 query = do_query("detector")
2967 self.assertEqual(len(list(query)), 4)
2969 Test = namedtuple(
2970 "testQueryDataIdsOrderByTest",
2971 ("element", "order_by", "result", "limit", "datasets", "collections"),
2972 defaults=(None, None, None),
2973 )
2975 test_data = (
2976 Test("detector", "detector", (1, 2, 3, 4)),
2977 Test("detector", "-detector", (4, 3, 2, 1)),
2978 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2979 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2980 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2981 Test("visit", "visit", (1, 2)),
2982 Test("visit", "-visit.id", (2, 1)),
2983 Test("visit", "zenith_angle", (1, 2)),
2984 Test("visit", "-visit.name", (2, 1)),
2985 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2986 )
2988 for test in test_data:
2989 order_by = test.order_by.split(",")
2990 query = do_query(test.element).order_by(*order_by)
2991 if test.limit is not None:
2992 query = query.limit(*test.limit)
2993 dataIds = tuple(rec.id for rec in query)
2994 self.assertEqual(dataIds, test.result)
2996 # errors in a name
2997 for order_by in ("", "-"):
2998 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2999 list(do_query("detector").order_by(order_by))
3001 for order_by in ("undimension.name", "-undimension.name"):
3002 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3003 list(do_query("detector").order_by(order_by))
3005 for order_by in ("attract", "-attract"):
3006 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3007 list(do_query("detector").order_by(order_by))
3009 def testQueryDimensionRecordsExceptions(self):
3010 """Test exceptions raised by queryDimensionRecords()."""
3011 registry = self.makeRegistry()
3012 self.loadData(registry, "base.yaml")
3013 self.loadData(registry, "datasets.yaml")
3014 self.loadData(registry, "spatial.yaml")
3016 result = registry.queryDimensionRecords("detector")
3017 self.assertEqual(result.count(), 4)
3018 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3019 self.assertEqual(result.count(), 4)
3020 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3021 self.assertEqual(result.count(), 4)
3022 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3023 self.assertEqual(result.count(), 4)
3024 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3025 self.assertEqual(result.count(), 4)
3027 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3028 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3029 result.count()
3031 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3032 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3033 result.count()
3035 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3036 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3037 result.count()
3039 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3040 result = registry.queryDimensionRecords(
3041 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3042 )
3043 result.count()
3045 def testDatasetConstrainedDimensionRecordQueries(self):
3046 """Test that queryDimensionRecords works even when given a dataset
3047 constraint whose dimensions extend beyond the requested dimension
3048 element's.
3049 """
3050 registry = self.makeRegistry()
3051 self.loadData(registry, "base.yaml")
3052 self.loadData(registry, "datasets.yaml")
3053 # Query for physical_filter dimension records, using a dataset that
3054 # has both physical_filter and dataset dimensions.
3055 records = registry.queryDimensionRecords(
3056 "physical_filter",
3057 datasets=["flat"],
3058 collections="imported_r",
3059 )
3060 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3061 # Trying to constrain by all dataset types is an error.
3062 with self.assertRaises(TypeError):
3063 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3065 def testSkyPixDatasetQueries(self):
3066 """Test that we can build queries involving skypix dimensions as long
3067 as a dataset type that uses those dimensions is included.
3068 """
3069 registry = self.makeRegistry()
3070 self.loadData(registry, "base.yaml")
3071 dataset_type = DatasetType(
3072 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3073 )
3074 registry.registerDatasetType(dataset_type)
3075 run = "r"
3076 registry.registerRun(run)
3077 # First try queries where there are no datasets; the concern is whether
3078 # we can even build and execute these queries without raising, even
3079 # when "doomed" query shortcuts are in play.
3080 self.assertFalse(
3081 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3082 )
3083 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3084 # Now add a dataset and see that we can get it back.
3085 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3086 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3087 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3088 self.assertEqual(
3089 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3090 {data_id},
3091 )
3092 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3094 def testDatasetIdFactory(self):
3095 """Simple test for DatasetIdFactory, mostly to catch potential changes
3096 in its API.
3097 """
3098 registry = self.makeRegistry()
3099 factory = registry.datasetIdFactory
3100 dataset_type = DatasetType(
3101 "datasetType",
3102 dimensions=["detector", "instrument"],
3103 universe=registry.dimensions,
3104 storageClass="int",
3105 )
3106 run = "run"
3107 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3109 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3110 self.assertIsInstance(datasetId, uuid.UUID)
3111 self.assertEqual(datasetId.version, 4)
3113 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3114 self.assertIsInstance(datasetId, uuid.UUID)
3115 self.assertEqual(datasetId.version, 5)
3117 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3118 self.assertIsInstance(datasetId, uuid.UUID)
3119 self.assertEqual(datasetId.version, 5)
3121 def testExposureQueries(self):
3122 """Test query methods using arguments sourced from the exposure log
3123 service.
3125 The most complete test dataset currently available to daf_butler tests
3126 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3127 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3128 dimension records as it was focused on providing nontrivial spatial
3129 overlaps between visit+detector and tract+patch. So in this test we
3130 need to translate queries that originally used the exposure dimension
3131 to use the (very similar) visit dimension instead.
3132 """
3133 registry = self.makeRegistry()
3134 self.loadData(registry, "hsc-rc2-subset.yaml")
3135 self.assertEqual(
3136 [
3137 record.id
3138 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3139 .order_by("id")
3140 .limit(5)
3141 ],
3142 [318, 322, 326, 330, 332],
3143 )
3144 self.assertEqual(
3145 [
3146 data_id["visit"]
3147 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3148 ],
3149 [318, 322, 326, 330, 332],
3150 )
3151 self.assertEqual(
3152 [
3153 record.id
3154 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3155 .order_by("full_name")
3156 .limit(5)
3157 ],
3158 [73, 72, 71, 70, 65],
3159 )
3160 self.assertEqual(
3161 [
3162 data_id["detector"]
3163 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3164 .order_by("full_name")
3165 .limit(5)
3166 ],
3167 [73, 72, 71, 70, 65],
3168 )
3170 def test_long_query_names(self) -> None:
3171 """Test that queries involving very long names are handled correctly.
3173 This is especially important for PostgreSQL, which truncates symbols
3174 longer than 64 chars, but it's worth testing for all DBs.
3175 """
3176 registry = self.makeRegistry()
3177 name = "abcd" * 17
3178 registry.registerDatasetType(
3179 DatasetType(
3180 name,
3181 dimensions=(),
3182 storageClass="Exposure",
3183 universe=registry.dimensions,
3184 )
3185 )
3186 # Need to search more than one collection actually containing a
3187 # matching dataset to avoid optimizations that sidestep bugs due to
3188 # truncation by making findFirst=True a no-op.
3189 run1 = "run1"
3190 registry.registerRun(run1)
3191 run2 = "run2"
3192 registry.registerRun(run2)
3193 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3194 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3195 self.assertEqual(
3196 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3197 {ref1},
3198 )
3200 def test_skypix_constraint_queries(self) -> None:
3201 """Test queries spatially constrained by a skypix data ID."""
3202 registry = self.makeRegistry()
3203 self.loadData(registry, "hsc-rc2-subset.yaml")
3204 patch_regions = {
3205 (data_id["tract"], data_id["patch"]): data_id.region
3206 for data_id in registry.queryDataIds(["patch"]).expanded()
3207 }
3208 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3209 # This check ensures the test doesn't become trivial due to a config
3210 # change; if it does, just pick a different HTML level.
3211 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3212 # Gather all skypix IDs that definitely overlap at least one of these
3213 # patches.
3214 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3215 for patch_region in patch_regions.values():
3216 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3217 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3218 # and does not overlap at least one other patch.
3219 for skypix_id in itertools.chain.from_iterable(
3220 range(begin, end) for begin, end in relevant_skypix_ids
3221 ):
3222 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3223 overlapping_patches = {
3224 patch_key
3225 for patch_key, patch_region in patch_regions.items()
3226 if not patch_region.isDisjointFrom(skypix_region)
3227 }
3228 if overlapping_patches and overlapping_patches != patch_regions.keys():
3229 break
3230 else:
3231 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3232 self.assertEqual(
3233 {
3234 (data_id["tract"], data_id["patch"])
3235 for data_id in registry.queryDataIds(
3236 ["patch"],
3237 dataId={skypix_dimension.name: skypix_id},
3238 )
3239 },
3240 overlapping_patches,
3241 )
3243 def test_spatial_constraint_queries(self) -> None:
3244 """Test queries in which one spatial dimension in the constraint (data
3245 ID or ``where`` string) constrains a different spatial dimension in the
3246 query result columns.
3247 """
3248 registry = self.makeRegistry()
3249 self.loadData(registry, "hsc-rc2-subset.yaml")
3250 patch_regions = {
3251 (data_id["tract"], data_id["patch"]): data_id.region
3252 for data_id in registry.queryDataIds(["patch"]).expanded()
3253 }
3254 observation_regions = {
3255 (data_id["visit"], data_id["detector"]): data_id.region
3256 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3257 }
3258 all_combos = {
3259 (patch_key, observation_key)
3260 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3261 }
3262 overlapping_combos = {
3263 (patch_key, observation_key)
3264 for patch_key, observation_key in all_combos
3265 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3266 }
3267 # Check a direct spatial join with no constraint first.
3268 self.assertEqual(
3269 {
3270 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3271 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3272 },
3273 overlapping_combos,
3274 )
3275 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3276 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3277 for patch_key, observation_key in overlapping_combos:
3278 overlaps_by_patch[patch_key].add(observation_key)
3279 overlaps_by_observation[observation_key].add(patch_key)
3280 # Find patches and observations that overlap at least one of the other
3281 # but not all of the other.
3282 nontrivial_patch = next(
3283 iter(
3284 patch_key
3285 for patch_key, observation_keys in overlaps_by_patch.items()
3286 if observation_keys and observation_keys != observation_regions.keys()
3287 )
3288 )
3289 nontrivial_observation = next(
3290 iter(
3291 observation_key
3292 for observation_key, patch_keys in overlaps_by_observation.items()
3293 if patch_keys and patch_keys != patch_regions.keys()
3294 )
3295 )
3296 # Use the nontrivial patches and observations as constraints on the
3297 # other dimensions in various ways, first via a 'where' expression.
3298 # It's better in general to us 'bind' instead of f-strings, but these
3299 # all integers so there are no quoting concerns.
3300 self.assertEqual(
3301 {
3302 (data_id["visit"], data_id["detector"])
3303 for data_id in registry.queryDataIds(
3304 ["visit", "detector"],
3305 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3306 skymap="hsc_rings_v1",
3307 )
3308 },
3309 overlaps_by_patch[nontrivial_patch],
3310 )
3311 self.assertEqual(
3312 {
3313 (data_id["tract"], data_id["patch"])
3314 for data_id in registry.queryDataIds(
3315 ["patch"],
3316 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3317 instrument="HSC",
3318 )
3319 },
3320 overlaps_by_observation[nontrivial_observation],
3321 )
3322 # and then via the dataId argument.
3323 self.assertEqual(
3324 {
3325 (data_id["visit"], data_id["detector"])
3326 for data_id in registry.queryDataIds(
3327 ["visit", "detector"],
3328 dataId={
3329 "tract": nontrivial_patch[0],
3330 "patch": nontrivial_patch[1],
3331 },
3332 skymap="hsc_rings_v1",
3333 )
3334 },
3335 overlaps_by_patch[nontrivial_patch],
3336 )
3337 self.assertEqual(
3338 {
3339 (data_id["tract"], data_id["patch"])
3340 for data_id in registry.queryDataIds(
3341 ["patch"],
3342 dataId={
3343 "visit": nontrivial_observation[0],
3344 "detector": nontrivial_observation[1],
3345 },
3346 instrument="HSC",
3347 )
3348 },
3349 overlaps_by_observation[nontrivial_observation],
3350 )