Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 Timespan,
56 ddl,
57)
58from .._collectionType import CollectionType
59from .._config import RegistryConfig
60from .._exceptions import (
61 ArgumentError,
62 CollectionError,
63 CollectionTypeError,
64 ConflictingDefinitionError,
65 DataIdValueError,
66 InconsistentDataIdError,
67 MissingCollectionError,
68 OrphanedRecordError,
69)
70from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
71from ..summaries import CollectionSummary
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._registry import Registry
77class RegistryTests(ABC):
78 """Generic tests for the `Registry` class that can be subclassed to
79 generate tests for different configurations.
80 """
82 collectionsManager: Optional[str] = None
83 """Name of the collections manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 datasetsManager: Optional[str] = None
89 """Name of the datasets manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 @classmethod
95 @abstractmethod
96 def getDataDir(cls) -> str:
97 """Return the root directory containing test data YAML files."""
98 raise NotImplementedError()
100 def makeRegistryConfig(self) -> RegistryConfig:
101 """Create RegistryConfig used to create a registry.
103 This method should be called by a subclass from `makeRegistry`.
104 Returned instance will be pre-configured based on the values of class
105 members, and default-configured for all other parameters. Subclasses
106 that need default configuration should just instantiate
107 `RegistryConfig` directly.
108 """
109 config = RegistryConfig()
110 if self.collectionsManager:
111 config["managers", "collections"] = self.collectionsManager
112 if self.datasetsManager:
113 config["managers", "datasets"] = self.datasetsManager
114 return config
116 @abstractmethod
117 def makeRegistry(self) -> Registry:
118 """Return the Registry instance to be tested."""
119 raise NotImplementedError()
121 def loadData(self, registry: Registry, filename: str):
122 """Load registry test data from ``getDataDir/<filename>``,
123 which should be a YAML import/export file.
124 """
125 from ...transfers import YamlRepoImportBackend
127 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
128 backend = YamlRepoImportBackend(stream, registry)
129 backend.register()
130 backend.load(datastore=None)
132 def checkQueryResults(self, results, expected):
133 """Check that a query results object contains expected values.
135 Parameters
136 ----------
137 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
138 A lazy-evaluation query results object.
139 expected : `list`
140 A list of `DataCoordinate` o `DatasetRef` objects that should be
141 equal to results of the query, aside from ordering.
142 """
143 self.assertCountEqual(list(results), expected)
144 self.assertEqual(results.count(), len(expected))
145 if expected:
146 self.assertTrue(results.any())
147 else:
148 self.assertFalse(results.any())
150 def testOpaque(self):
151 """Tests for `Registry.registerOpaqueTable`,
152 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
153 `Registry.deleteOpaqueData`.
154 """
155 registry = self.makeRegistry()
156 table = "opaque_table_for_testing"
157 registry.registerOpaqueTable(
158 table,
159 spec=ddl.TableSpec(
160 fields=[
161 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
162 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
163 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
164 ],
165 ),
166 )
167 rows = [
168 {"id": 1, "name": "one", "count": None},
169 {"id": 2, "name": "two", "count": 5},
170 {"id": 3, "name": "three", "count": 6},
171 ]
172 registry.insertOpaqueData(table, *rows)
173 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
174 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
175 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
176 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
177 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
178 # Test very long IN clause which exceeds sqlite limit on number of
179 # parameters. SQLite says the limit is 32k but it looks like it is
180 # much higher.
181 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
182 # Two IN clauses, each longer than 1k batch size, first with
183 # duplicates, second has matching elements in different batches (after
184 # sorting).
185 self.assertEqual(
186 rows[0:2],
187 list(
188 registry.fetchOpaqueData(
189 table,
190 id=list(range(1000)) + list(range(100, 0, -1)),
191 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
192 )
193 ),
194 )
195 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
196 registry.deleteOpaqueData(table, id=3)
197 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
198 registry.deleteOpaqueData(table)
199 self.assertEqual([], list(registry.fetchOpaqueData(table)))
201 def testDatasetType(self):
202 """Tests for `Registry.registerDatasetType` and
203 `Registry.getDatasetType`.
204 """
205 registry = self.makeRegistry()
206 # Check valid insert
207 datasetTypeName = "test"
208 storageClass = StorageClass("testDatasetType")
209 registry.storageClasses.registerStorageClass(storageClass)
210 dimensions = registry.dimensions.extract(("instrument", "visit"))
211 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
212 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
213 # Inserting for the first time should return True
214 self.assertTrue(registry.registerDatasetType(inDatasetType))
215 outDatasetType1 = registry.getDatasetType(datasetTypeName)
216 self.assertEqual(outDatasetType1, inDatasetType)
218 # Re-inserting should work
219 self.assertFalse(registry.registerDatasetType(inDatasetType))
220 # Except when they are not identical
221 with self.assertRaises(ConflictingDefinitionError):
222 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
223 registry.registerDatasetType(nonIdenticalDatasetType)
225 # Template can be None
226 datasetTypeName = "testNoneTemplate"
227 storageClass = StorageClass("testDatasetType2")
228 registry.storageClasses.registerStorageClass(storageClass)
229 dimensions = registry.dimensions.extract(("instrument", "visit"))
230 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
231 registry.registerDatasetType(inDatasetType)
232 outDatasetType2 = registry.getDatasetType(datasetTypeName)
233 self.assertEqual(outDatasetType2, inDatasetType)
235 allTypes = set(registry.queryDatasetTypes())
236 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
238 def testDimensions(self):
239 """Tests for `Registry.insertDimensionData`,
240 `Registry.syncDimensionData`, and `Registry.expandDataId`.
241 """
242 registry = self.makeRegistry()
243 dimensionName = "instrument"
244 dimension = registry.dimensions[dimensionName]
245 dimensionValue = {
246 "name": "DummyCam",
247 "visit_max": 10,
248 "exposure_max": 10,
249 "detector_max": 2,
250 "class_name": "lsst.obs.base.Instrument",
251 }
252 registry.insertDimensionData(dimensionName, dimensionValue)
253 # Inserting the same value twice should fail
254 with self.assertRaises(sqlalchemy.exc.IntegrityError):
255 registry.insertDimensionData(dimensionName, dimensionValue)
256 # expandDataId should retrieve the record we just inserted
257 self.assertEqual(
258 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
259 .records[dimensionName]
260 .toDict(),
261 dimensionValue,
262 )
263 # expandDataId should raise if there is no record with the given ID.
264 with self.assertRaises(DataIdValueError):
265 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
266 # band doesn't have a table; insert should fail.
267 with self.assertRaises(TypeError):
268 registry.insertDimensionData("band", {"band": "i"})
269 dimensionName2 = "physical_filter"
270 dimension2 = registry.dimensions[dimensionName2]
271 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
272 # Missing required dependency ("instrument") should fail
273 with self.assertRaises(KeyError):
274 registry.insertDimensionData(dimensionName2, dimensionValue2)
275 # Adding required dependency should fix the failure
276 dimensionValue2["instrument"] = "DummyCam"
277 registry.insertDimensionData(dimensionName2, dimensionValue2)
278 # expandDataId should retrieve the record we just inserted.
279 self.assertEqual(
280 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
281 .records[dimensionName2]
282 .toDict(),
283 dimensionValue2,
284 )
285 # Use syncDimensionData to insert a new record successfully.
286 dimensionName3 = "detector"
287 dimensionValue3 = {
288 "instrument": "DummyCam",
289 "id": 1,
290 "full_name": "one",
291 "name_in_raft": "zero",
292 "purpose": "SCIENCE",
293 }
294 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
295 # Sync that again. Note that one field ("raft") is NULL, and that
296 # should be okay.
297 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
298 # Now try that sync with the same primary key but a different value.
299 # This should fail.
300 with self.assertRaises(ConflictingDefinitionError):
301 registry.syncDimensionData(
302 dimensionName3,
303 {
304 "instrument": "DummyCam",
305 "id": 1,
306 "full_name": "one",
307 "name_in_raft": "four",
308 "purpose": "SCIENCE",
309 },
310 )
312 @unittest.skipIf(np is None, "numpy not available.")
313 def testNumpyDataId(self):
314 """Test that we can use a numpy int in a dataId."""
315 registry = self.makeRegistry()
316 dimensionEntries = [
317 ("instrument", {"instrument": "DummyCam"}),
318 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
319 # Using an np.int64 here fails unless Records.fromDict is also
320 # patched to look for numbers.Integral
321 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
322 ]
323 for args in dimensionEntries:
324 registry.insertDimensionData(*args)
326 # Try a normal integer and something that looks like an int but
327 # is not.
328 for visit_id in (42, np.int64(42)):
329 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
330 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
331 self.assertEqual(expanded["visit"], int(visit_id))
332 self.assertIsInstance(expanded["visit"], int)
334 def testDataIdRelationships(self):
335 """Test that `Registry.expandDataId` raises an exception when the given
336 keys are inconsistent.
337 """
338 registry = self.makeRegistry()
339 self.loadData(registry, "base.yaml")
340 # Insert a few more dimension records for the next test.
341 registry.insertDimensionData(
342 "exposure",
343 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
344 )
345 registry.insertDimensionData(
346 "exposure",
347 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
348 )
349 registry.insertDimensionData(
350 "visit_system",
351 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
352 )
353 registry.insertDimensionData(
354 "visit",
355 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
356 )
357 registry.insertDimensionData(
358 "visit_definition",
359 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
360 )
361 with self.assertRaises(InconsistentDataIdError):
362 registry.expandDataId(
363 {"instrument": "Cam1", "visit": 1, "exposure": 2},
364 )
366 def testDataset(self):
367 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
368 and `Registry.removeDatasets`.
369 """
370 registry = self.makeRegistry()
371 self.loadData(registry, "base.yaml")
372 run = "test"
373 registry.registerRun(run)
374 datasetType = registry.getDatasetType("bias")
375 dataId = {"instrument": "Cam1", "detector": 2}
376 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
377 outRef = registry.getDataset(ref.id)
378 self.assertIsNotNone(ref.id)
379 self.assertEqual(ref, outRef)
380 with self.assertRaises(ConflictingDefinitionError):
381 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
382 registry.removeDatasets([ref])
383 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
385 def testFindDataset(self):
386 """Tests for `Registry.findDataset`."""
387 registry = self.makeRegistry()
388 self.loadData(registry, "base.yaml")
389 run = "test"
390 datasetType = registry.getDatasetType("bias")
391 dataId = {"instrument": "Cam1", "detector": 4}
392 registry.registerRun(run)
393 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
394 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
395 self.assertEqual(outputRef, inputRef)
396 # Check that retrieval with invalid dataId raises
397 with self.assertRaises(LookupError):
398 dataId = {"instrument": "Cam1"} # no detector
399 registry.findDataset(datasetType, dataId, collections=run)
400 # Check that different dataIds match to different datasets
401 dataId1 = {"instrument": "Cam1", "detector": 1}
402 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
403 dataId2 = {"instrument": "Cam1", "detector": 2}
404 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
405 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
406 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
407 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
408 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
409 # Check that requesting a non-existing dataId returns None
410 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
411 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
413 def testRemoveDatasetTypeSuccess(self):
414 """Test that Registry.removeDatasetType works when there are no
415 datasets of that type present.
416 """
417 registry = self.makeRegistry()
418 self.loadData(registry, "base.yaml")
419 registry.removeDatasetType("flat")
420 with self.assertRaises(KeyError):
421 registry.getDatasetType("flat")
423 def testRemoveDatasetTypeFailure(self):
424 """Test that Registry.removeDatasetType raises when there are datasets
425 of that type present or if the dataset type is for a component.
426 """
427 registry = self.makeRegistry()
428 self.loadData(registry, "base.yaml")
429 self.loadData(registry, "datasets.yaml")
430 with self.assertRaises(OrphanedRecordError):
431 registry.removeDatasetType("flat")
432 with self.assertRaises(ValueError):
433 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
435 def testImportDatasetsUUID(self):
436 """Test for `Registry._importDatasets` with UUID dataset ID."""
437 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
438 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
440 registry = self.makeRegistry()
441 self.loadData(registry, "base.yaml")
442 for run in range(6):
443 registry.registerRun(f"run{run}")
444 datasetTypeBias = registry.getDatasetType("bias")
445 datasetTypeFlat = registry.getDatasetType("flat")
446 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
447 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
448 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
450 dataset_id = uuid.uuid4()
451 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
452 (ref1,) = registry._importDatasets([ref])
453 # UUID is used without change
454 self.assertEqual(ref.id, ref1.id)
456 # All different failure modes
457 refs = (
458 # Importing same DatasetRef with different dataset ID is an error
459 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
460 # Same DatasetId but different DataId
461 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
462 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
463 # Same DatasetRef and DatasetId but different run
464 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
465 )
466 for ref in refs:
467 with self.assertRaises(ConflictingDefinitionError):
468 registry._importDatasets([ref])
470 # Test for non-unique IDs, they can be re-imported multiple times.
471 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
472 with self.subTest(idGenMode=idGenMode):
474 # Use integer dataset ID to force UUID calculation in _import
475 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
476 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
477 self.assertIsInstance(ref1.id, uuid.UUID)
478 self.assertEqual(ref1.id.version, 5)
480 # Importing it again is OK
481 (ref2,) = registry._importDatasets([ref1])
482 self.assertEqual(ref2.id, ref1.id)
484 # Cannot import to different run with the same ID
485 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
486 with self.assertRaises(ConflictingDefinitionError):
487 registry._importDatasets([ref])
489 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
490 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
491 # Cannot import same DATAID_TYPE ref into a new run
492 with self.assertRaises(ConflictingDefinitionError):
493 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
494 else:
495 # DATAID_TYPE_RUN ref can be imported into a new run
496 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
498 def testImportDatasetsInt(self):
499 """Test for `Registry._importDatasets` with integer dataset ID."""
500 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
501 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
503 registry = self.makeRegistry()
504 self.loadData(registry, "base.yaml")
505 run = "test"
506 registry.registerRun(run)
507 datasetTypeBias = registry.getDatasetType("bias")
508 datasetTypeFlat = registry.getDatasetType("flat")
509 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
510 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
511 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
512 dataset_id = 999999999
514 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
515 (ref1,) = registry._importDatasets([ref])
516 # Should make new integer ID.
517 self.assertNotEqual(ref1.id, ref.id)
519 # Ingesting same dataId with different dataset ID is an error
520 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
521 with self.assertRaises(ConflictingDefinitionError):
522 registry._importDatasets([ref2])
524 # Ingesting different dataId with the same dataset ID should work
525 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
526 (ref4,) = registry._importDatasets([ref3])
527 self.assertNotEqual(ref4.id, ref1.id)
529 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
530 (ref4,) = registry._importDatasets([ref3])
531 self.assertNotEqual(ref4.id, ref1.id)
533 def testDatasetTypeComponentQueries(self):
534 """Test component options when querying for dataset types."""
535 registry = self.makeRegistry()
536 self.loadData(registry, "base.yaml")
537 self.loadData(registry, "datasets.yaml")
538 # Test querying for dataset types with different inputs.
539 # First query for all dataset types; components should only be included
540 # when components=True.
541 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
542 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
543 self.assertLess(
544 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
545 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
546 )
547 # Use a pattern that can match either parent or components. Again,
548 # components are only returned if components=True.
549 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
550 self.assertEqual(
551 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
552 )
553 self.assertLess(
554 {"bias", "bias.wcs"},
555 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
556 )
557 # This pattern matches only a component. In this case we also return
558 # that component dataset type if components=None.
559 self.assertEqual(
560 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
561 )
562 self.assertEqual(
563 set(),
564 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
565 )
566 self.assertEqual(
567 {"bias.wcs"},
568 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
569 )
570 # Add a dataset type using a StorageClass that we'll then remove; check
571 # that this does not affect our ability to query for dataset types
572 # (though it will warn).
573 tempStorageClass = StorageClass(
574 name="TempStorageClass",
575 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")},
576 )
577 registry.storageClasses.registerStorageClass(tempStorageClass)
578 datasetType = DatasetType(
579 "temporary",
580 dimensions=["instrument"],
581 storageClass=tempStorageClass,
582 universe=registry.dimensions,
583 )
584 registry.registerDatasetType(datasetType)
585 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
586 datasetType._storageClass = None
587 del tempStorageClass
588 # Querying for all dataset types, including components, should include
589 # at least all non-component dataset types (and I don't want to
590 # enumerate all of the Exposure components for bias and flat here).
591 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
592 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
593 self.assertIn("TempStorageClass", cm.output[0])
594 self.assertLess({"bias", "flat", "temporary"}, everything.names)
595 # It should not include "temporary.columns", because we tried to remove
596 # the storage class that would tell it about that. So if the next line
597 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
598 # this part of the test isn't doing anything, because the _unregister
599 # call about isn't simulating the real-life case we want it to
600 # simulate, in which different versions of daf_butler in entirely
601 # different Python processes interact with the same repo.
602 self.assertNotIn("temporary.data", everything.names)
603 # Query for dataset types that start with "temp". This should again
604 # not include the component, and also not fail.
605 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
606 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
607 self.assertIn("TempStorageClass", cm.output[0])
608 self.assertEqual({"temporary"}, startsWithTemp.names)
609 # Querying with no components should not warn at all.
610 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
611 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
612 # Must issue a warning of our own to be captured.
613 logging.getLogger("lsst.daf.butler.registries").warning("test message")
614 self.assertEqual(len(cm.output), 1)
615 self.assertIn("test message", cm.output[0])
617 def testComponentLookups(self):
618 """Test searching for component datasets via their parents."""
619 registry = self.makeRegistry()
620 self.loadData(registry, "base.yaml")
621 self.loadData(registry, "datasets.yaml")
622 # Test getting the child dataset type (which does still exist in the
623 # Registry), and check for consistency with
624 # DatasetRef.makeComponentRef.
625 collection = "imported_g"
626 parentType = registry.getDatasetType("bias")
627 childType = registry.getDatasetType("bias.wcs")
628 parentRefResolved = registry.findDataset(
629 parentType, collections=collection, instrument="Cam1", detector=1
630 )
631 self.assertIsInstance(parentRefResolved, DatasetRef)
632 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
633 # Search for a single dataset with findDataset.
634 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
635 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
636 # Search for detector data IDs constrained by component dataset
637 # existence with queryDataIds.
638 dataIds = registry.queryDataIds(
639 ["detector"],
640 datasets=["bias.wcs"],
641 collections=collection,
642 ).toSet()
643 self.assertEqual(
644 dataIds,
645 DataCoordinateSet(
646 {
647 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
648 for d in (1, 2, 3)
649 },
650 parentType.dimensions,
651 ),
652 )
653 # Search for multiple datasets of a single type with queryDatasets.
654 childRefs2 = set(
655 registry.queryDatasets(
656 "bias.wcs",
657 collections=collection,
658 )
659 )
660 self.assertEqual(
661 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
662 )
664 def testCollections(self):
665 """Tests for registry methods that manage collections."""
666 registry = self.makeRegistry()
667 self.loadData(registry, "base.yaml")
668 self.loadData(registry, "datasets.yaml")
669 run1 = "imported_g"
670 run2 = "imported_r"
671 # Test setting a collection docstring after it has been created.
672 registry.setCollectionDocumentation(run1, "doc for run1")
673 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
674 registry.setCollectionDocumentation(run1, None)
675 self.assertIsNone(registry.getCollectionDocumentation(run1))
676 datasetType = "bias"
677 # Find some datasets via their run's collection.
678 dataId1 = {"instrument": "Cam1", "detector": 1}
679 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
680 self.assertIsNotNone(ref1)
681 dataId2 = {"instrument": "Cam1", "detector": 2}
682 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
683 self.assertIsNotNone(ref2)
684 # Associate those into a new collection, then look for them there.
685 tag1 = "tag1"
686 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
687 # Check that we can query for old and new collections by type.
688 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
689 self.assertEqual(
690 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
691 {tag1, run1, run2},
692 )
693 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
694 registry.associate(tag1, [ref1, ref2])
695 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
696 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
697 # Disassociate one and verify that we can't it there anymore...
698 registry.disassociate(tag1, [ref1])
699 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
700 # ...but we can still find ref2 in tag1, and ref1 in the run.
701 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
702 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
703 collections = set(registry.queryCollections())
704 self.assertEqual(collections, {run1, run2, tag1})
705 # Associate both refs into tag1 again; ref2 is already there, but that
706 # should be a harmless no-op.
707 registry.associate(tag1, [ref1, ref2])
708 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
709 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
710 # Get a different dataset (from a different run) that has the same
711 # dataset type and data ID as ref2.
712 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
713 self.assertNotEqual(ref2, ref2b)
714 # Attempting to associate that into tag1 should be an error.
715 with self.assertRaises(ConflictingDefinitionError):
716 registry.associate(tag1, [ref2b])
717 # That error shouldn't have messed up what we had before.
718 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
719 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
720 # Attempt to associate the conflicting dataset again, this time with
721 # a dataset that isn't in the collection and won't cause a conflict.
722 # Should also fail without modifying anything.
723 dataId3 = {"instrument": "Cam1", "detector": 3}
724 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
725 with self.assertRaises(ConflictingDefinitionError):
726 registry.associate(tag1, [ref3, ref2b])
727 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
728 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
729 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
730 # Register a chained collection that searches [tag1, run2]
731 chain1 = "chain1"
732 registry.registerCollection(chain1, type=CollectionType.CHAINED)
733 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
734 # Chained collection exists, but has no collections in it.
735 self.assertFalse(registry.getCollectionChain(chain1))
736 # If we query for all collections, we should get the chained collection
737 # only if we don't ask to flatten it (i.e. yield only its children).
738 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
739 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
740 # Attempt to set its child collections to something circular; that
741 # should fail.
742 with self.assertRaises(ValueError):
743 registry.setCollectionChain(chain1, [tag1, chain1])
744 # Add the child collections.
745 registry.setCollectionChain(chain1, [tag1, run2])
746 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
747 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
748 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
749 # Searching for dataId1 or dataId2 in the chain should return ref1 and
750 # ref2, because both are in tag1.
751 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
752 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
753 # Now disassociate ref2 from tag1. The search (for bias) with
754 # dataId2 in chain1 should then:
755 # 1. not find it in tag1
756 # 2. find a different dataset in run2
757 registry.disassociate(tag1, [ref2])
758 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
759 self.assertNotEqual(ref2b, ref2)
760 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
761 # Define a new chain so we can test recursive chains.
762 chain2 = "chain2"
763 registry.registerCollection(chain2, type=CollectionType.CHAINED)
764 registry.setCollectionChain(chain2, [run2, chain1])
765 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
766 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
767 # Query for collections matching a regex.
768 self.assertCountEqual(
769 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
770 ["imported_r", "imported_g"],
771 )
772 # Query for collections matching a regex or an explicit str.
773 self.assertCountEqual(
774 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
775 ["imported_r", "imported_g", "chain1"],
776 )
777 # Search for bias with dataId1 should find it via tag1 in chain2,
778 # recursing, because is not in run1.
779 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
780 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
781 # Search for bias with dataId2 should find it in run2 (ref2b).
782 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
783 # Search for a flat that is in run2. That should not be found
784 # at the front of chain2, because of the restriction to bias
785 # on run2 there, but it should be found in at the end of chain1.
786 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
787 ref4 = registry.findDataset("flat", dataId4, collections=run2)
788 self.assertIsNotNone(ref4)
789 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
790 # Deleting a collection that's part of a CHAINED collection is not
791 # allowed, and is exception-safe.
792 with self.assertRaises(Exception):
793 registry.removeCollection(run2)
794 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
795 with self.assertRaises(Exception):
796 registry.removeCollection(chain1)
797 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
798 # Actually remove chain2, test that it's gone by asking for its type.
799 registry.removeCollection(chain2)
800 with self.assertRaises(MissingCollectionError):
801 registry.getCollectionType(chain2)
802 # Actually remove run2 and chain1, which should work now.
803 registry.removeCollection(chain1)
804 registry.removeCollection(run2)
805 with self.assertRaises(MissingCollectionError):
806 registry.getCollectionType(run2)
807 with self.assertRaises(MissingCollectionError):
808 registry.getCollectionType(chain1)
809 # Remove tag1 as well, just to test that we can remove TAGGED
810 # collections.
811 registry.removeCollection(tag1)
812 with self.assertRaises(MissingCollectionError):
813 registry.getCollectionType(tag1)
815 def testCollectionChainFlatten(self):
816 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
817 registry = self.makeRegistry()
818 registry.registerCollection("inner", CollectionType.CHAINED)
819 registry.registerCollection("innermost", CollectionType.RUN)
820 registry.setCollectionChain("inner", ["innermost"])
821 registry.registerCollection("outer", CollectionType.CHAINED)
822 registry.setCollectionChain("outer", ["inner"], flatten=False)
823 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
824 registry.setCollectionChain("outer", ["inner"], flatten=True)
825 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
827 def testBasicTransaction(self):
828 """Test that all operations within a single transaction block are
829 rolled back if an exception propagates out of the block.
830 """
831 registry = self.makeRegistry()
832 storageClass = StorageClass("testDatasetType")
833 registry.storageClasses.registerStorageClass(storageClass)
834 with registry.transaction():
835 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
836 with self.assertRaises(ValueError):
837 with registry.transaction():
838 registry.insertDimensionData("instrument", {"name": "Cam2"})
839 raise ValueError("Oops, something went wrong")
840 # Cam1 should exist
841 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
842 # But Cam2 and Cam3 should both not exist
843 with self.assertRaises(DataIdValueError):
844 registry.expandDataId(instrument="Cam2")
845 with self.assertRaises(DataIdValueError):
846 registry.expandDataId(instrument="Cam3")
848 def testNestedTransaction(self):
849 """Test that operations within a transaction block are not rolled back
850 if an exception propagates out of an inner transaction block and is
851 then caught.
852 """
853 registry = self.makeRegistry()
854 dimension = registry.dimensions["instrument"]
855 dataId1 = {"instrument": "DummyCam"}
856 dataId2 = {"instrument": "DummyCam2"}
857 checkpointReached = False
858 with registry.transaction():
859 # This should be added and (ultimately) committed.
860 registry.insertDimensionData(dimension, dataId1)
861 with self.assertRaises(sqlalchemy.exc.IntegrityError):
862 with registry.transaction(savepoint=True):
863 # This does not conflict, and should succeed (but not
864 # be committed).
865 registry.insertDimensionData(dimension, dataId2)
866 checkpointReached = True
867 # This should conflict and raise, triggerring a rollback
868 # of the previous insertion within the same transaction
869 # context, but not the original insertion in the outer
870 # block.
871 registry.insertDimensionData(dimension, dataId1)
872 self.assertTrue(checkpointReached)
873 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
874 with self.assertRaises(DataIdValueError):
875 registry.expandDataId(dataId2, graph=dimension.graph)
877 def testInstrumentDimensions(self):
878 """Test queries involving only instrument dimensions, with no joins to
879 skymap."""
880 registry = self.makeRegistry()
882 # need a bunch of dimensions and datasets for test
883 registry.insertDimensionData(
884 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
885 )
886 registry.insertDimensionData(
887 "physical_filter",
888 dict(instrument="DummyCam", name="dummy_r", band="r"),
889 dict(instrument="DummyCam", name="dummy_i", band="i"),
890 )
891 registry.insertDimensionData(
892 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
893 )
894 registry.insertDimensionData(
895 "visit_system",
896 dict(instrument="DummyCam", id=1, name="default"),
897 )
898 registry.insertDimensionData(
899 "visit",
900 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
901 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
902 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
903 )
904 registry.insertDimensionData(
905 "exposure",
906 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
907 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
908 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
909 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
910 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
911 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
912 )
913 registry.insertDimensionData(
914 "visit_definition",
915 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
916 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
917 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
918 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
919 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
920 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
921 )
922 # dataset types
923 run1 = "test1_r"
924 run2 = "test2_r"
925 tagged2 = "test2_t"
926 registry.registerRun(run1)
927 registry.registerRun(run2)
928 registry.registerCollection(tagged2)
929 storageClass = StorageClass("testDataset")
930 registry.storageClasses.registerStorageClass(storageClass)
931 rawType = DatasetType(
932 name="RAW",
933 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
934 storageClass=storageClass,
935 )
936 registry.registerDatasetType(rawType)
937 calexpType = DatasetType(
938 name="CALEXP",
939 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
940 storageClass=storageClass,
941 )
942 registry.registerDatasetType(calexpType)
944 # add pre-existing datasets
945 for exposure in (100, 101, 110, 111):
946 for detector in (1, 2, 3):
947 # note that only 3 of 5 detectors have datasets
948 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
949 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
950 # exposures 100 and 101 appear in both run1 and tagged2.
951 # 100 has different datasets in the different collections
952 # 101 has the same dataset in both collections.
953 if exposure == 100:
954 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
955 if exposure in (100, 101):
956 registry.associate(tagged2, [ref])
957 # Add pre-existing datasets to tagged2.
958 for exposure in (200, 201):
959 for detector in (3, 4, 5):
960 # note that only 3 of 5 detectors have datasets
961 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
962 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
963 registry.associate(tagged2, [ref])
965 dimensions = DimensionGraph(
966 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
967 )
968 # Test that single dim string works as well as list of str
969 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
970 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
971 self.assertEqual(rows, rowsI)
972 # with empty expression
973 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
974 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
975 for dataId in rows:
976 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
977 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
978 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
979 self.assertEqual(
980 packer1.unpack(packer1.pack(dataId)),
981 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
982 )
983 self.assertEqual(
984 packer2.unpack(packer2.pack(dataId)),
985 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
986 )
987 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
988 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
989 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
990 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
992 # second collection
993 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
994 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
995 for dataId in rows:
996 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
997 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
998 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
999 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1001 # with two input datasets
1002 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1003 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1004 for dataId in rows:
1005 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1006 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1007 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1008 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1010 # limit to single visit
1011 rows = registry.queryDataIds(
1012 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1013 ).toSet()
1014 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1015 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1016 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1017 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1019 # more limiting expression, using link names instead of Table.column
1020 rows = registry.queryDataIds(
1021 dimensions,
1022 datasets=rawType,
1023 collections=run1,
1024 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1025 ).toSet()
1026 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1027 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1028 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1029 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1031 # queryDataIds with only one of `datasets` and `collections` is an
1032 # error.
1033 with self.assertRaises(CollectionError):
1034 registry.queryDataIds(dimensions, datasets=rawType)
1035 with self.assertRaises(ArgumentError):
1036 registry.queryDataIds(dimensions, collections=run1)
1038 # expression excludes everything
1039 rows = registry.queryDataIds(
1040 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1041 ).toSet()
1042 self.assertEqual(len(rows), 0)
1044 # Selecting by physical_filter, this is not in the dimensions, but it
1045 # is a part of the full expression so it should work too.
1046 rows = registry.queryDataIds(
1047 dimensions,
1048 datasets=rawType,
1049 collections=run1,
1050 where="physical_filter = 'dummy_r'",
1051 instrument="DummyCam",
1052 ).toSet()
1053 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1054 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1055 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1056 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1058 def testSkyMapDimensions(self):
1059 """Tests involving only skymap dimensions, no joins to instrument."""
1060 registry = self.makeRegistry()
1062 # need a bunch of dimensions and datasets for test, we want
1063 # "band" in the test so also have to add physical_filter
1064 # dimensions
1065 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1066 registry.insertDimensionData(
1067 "physical_filter",
1068 dict(instrument="DummyCam", name="dummy_r", band="r"),
1069 dict(instrument="DummyCam", name="dummy_i", band="i"),
1070 )
1071 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1072 for tract in range(10):
1073 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1074 registry.insertDimensionData(
1075 "patch",
1076 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1077 )
1079 # dataset types
1080 run = "test"
1081 registry.registerRun(run)
1082 storageClass = StorageClass("testDataset")
1083 registry.storageClasses.registerStorageClass(storageClass)
1084 calexpType = DatasetType(
1085 name="deepCoadd_calexp",
1086 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1087 storageClass=storageClass,
1088 )
1089 registry.registerDatasetType(calexpType)
1090 mergeType = DatasetType(
1091 name="deepCoadd_mergeDet",
1092 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1093 storageClass=storageClass,
1094 )
1095 registry.registerDatasetType(mergeType)
1096 measType = DatasetType(
1097 name="deepCoadd_meas",
1098 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1099 storageClass=storageClass,
1100 )
1101 registry.registerDatasetType(measType)
1103 dimensions = DimensionGraph(
1104 registry.dimensions,
1105 dimensions=(
1106 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1107 ),
1108 )
1110 # add pre-existing datasets
1111 for tract in (1, 3, 5):
1112 for patch in (2, 4, 6, 7):
1113 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1114 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1115 for aFilter in ("i", "r"):
1116 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1117 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1119 # with empty expression
1120 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1121 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1122 for dataId in rows:
1123 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1124 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1125 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1126 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1128 # limit to 2 tracts and 2 patches
1129 rows = registry.queryDataIds(
1130 dimensions,
1131 datasets=[calexpType, mergeType],
1132 collections=run,
1133 where="tract IN (1, 5) AND patch IN (2, 7)",
1134 skymap="DummyMap",
1135 ).toSet()
1136 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1137 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1138 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1139 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1141 # limit to single filter
1142 rows = registry.queryDataIds(
1143 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1144 ).toSet()
1145 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1146 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1147 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1148 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1150 # Specifying non-existing skymap is an exception
1151 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1152 rows = registry.queryDataIds(
1153 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1154 ).toSet()
1156 def testSpatialJoin(self):
1157 """Test queries that involve spatial overlap joins."""
1158 registry = self.makeRegistry()
1159 self.loadData(registry, "hsc-rc2-subset.yaml")
1161 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1162 # the TopologicalFamily they belong to. We'll relate all elements in
1163 # each family to all of the elements in each other family.
1164 families = defaultdict(set)
1165 # Dictionary of {element.name: {dataId: region}}.
1166 regions = {}
1167 for element in registry.dimensions.getDatabaseElements():
1168 if element.spatial is not None:
1169 families[element.spatial.name].add(element)
1170 regions[element.name] = {
1171 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1172 }
1174 # If this check fails, it's not necessarily a problem - it may just be
1175 # a reasonable change to the default dimension definitions - but the
1176 # test below depends on there being more than one family to do anything
1177 # useful.
1178 self.assertEqual(len(families), 2)
1180 # Overlap DatabaseDimensionElements with each other.
1181 for family1, family2 in itertools.combinations(families, 2):
1182 for element1, element2 in itertools.product(families[family1], families[family2]):
1183 graph = DimensionGraph.union(element1.graph, element2.graph)
1184 # Construct expected set of overlapping data IDs via a
1185 # brute-force comparison of the regions we've already fetched.
1186 expected = {
1187 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1188 for (dataId1, region1), (dataId2, region2) in itertools.product(
1189 regions[element1.name].items(), regions[element2.name].items()
1190 )
1191 if not region1.isDisjointFrom(region2)
1192 }
1193 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1194 queried = set(registry.queryDataIds(graph))
1195 self.assertEqual(expected, queried)
1197 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1198 commonSkyPix = registry.dimensions.commonSkyPix
1199 for elementName, regions in regions.items():
1200 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1201 expected = set()
1202 for dataId, region in regions.items():
1203 for begin, end in commonSkyPix.pixelization.envelope(region):
1204 expected.update(
1205 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1206 for index in range(begin, end)
1207 )
1208 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1209 queried = set(registry.queryDataIds(graph))
1210 self.assertEqual(expected, queried)
1212 def testAbstractQuery(self):
1213 """Test that we can run a query that just lists the known
1214 bands. This is tricky because band is
1215 backed by a query against physical_filter.
1216 """
1217 registry = self.makeRegistry()
1218 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1219 registry.insertDimensionData(
1220 "physical_filter",
1221 dict(instrument="DummyCam", name="dummy_i", band="i"),
1222 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1223 dict(instrument="DummyCam", name="dummy_r", band="r"),
1224 )
1225 rows = registry.queryDataIds(["band"]).toSet()
1226 self.assertCountEqual(
1227 rows,
1228 [
1229 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1230 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1231 ],
1232 )
1234 def testAttributeManager(self):
1235 """Test basic functionality of attribute manager."""
1236 # number of attributes with schema versions in a fresh database,
1237 # 6 managers with 3 records per manager, plus config for dimensions
1238 VERSION_COUNT = 6 * 3 + 1
1240 registry = self.makeRegistry()
1241 attributes = registry._managers.attributes
1243 # check what get() returns for non-existing key
1244 self.assertIsNone(attributes.get("attr"))
1245 self.assertEqual(attributes.get("attr", ""), "")
1246 self.assertEqual(attributes.get("attr", "Value"), "Value")
1247 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1249 # cannot store empty key or value
1250 with self.assertRaises(ValueError):
1251 attributes.set("", "value")
1252 with self.assertRaises(ValueError):
1253 attributes.set("attr", "")
1255 # set value of non-existing key
1256 attributes.set("attr", "value")
1257 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1258 self.assertEqual(attributes.get("attr"), "value")
1260 # update value of existing key
1261 with self.assertRaises(ButlerAttributeExistsError):
1262 attributes.set("attr", "value2")
1264 attributes.set("attr", "value2", force=True)
1265 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1266 self.assertEqual(attributes.get("attr"), "value2")
1268 # delete existing key
1269 self.assertTrue(attributes.delete("attr"))
1270 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1272 # delete non-existing key
1273 self.assertFalse(attributes.delete("non-attr"))
1275 # store bunch of keys and get the list back
1276 data = [
1277 ("version.core", "1.2.3"),
1278 ("version.dimensions", "3.2.1"),
1279 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1280 ]
1281 for key, value in data:
1282 attributes.set(key, value)
1283 items = dict(attributes.items())
1284 for key, value in data:
1285 self.assertEqual(items[key], value)
1287 def testQueryDatasetsDeduplication(self):
1288 """Test that the findFirst option to queryDatasets selects datasets
1289 from collections in the order given".
1290 """
1291 registry = self.makeRegistry()
1292 self.loadData(registry, "base.yaml")
1293 self.loadData(registry, "datasets.yaml")
1294 self.assertCountEqual(
1295 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1296 [
1297 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1298 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1299 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1300 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1301 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1302 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1303 ],
1304 )
1305 self.assertCountEqual(
1306 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1307 [
1308 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1309 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1310 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1311 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1312 ],
1313 )
1314 self.assertCountEqual(
1315 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1316 [
1317 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1318 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1319 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1320 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1321 ],
1322 )
1324 def testQueryResults(self):
1325 """Test querying for data IDs and then manipulating the QueryResults
1326 object returned to perform other queries.
1327 """
1328 registry = self.makeRegistry()
1329 self.loadData(registry, "base.yaml")
1330 self.loadData(registry, "datasets.yaml")
1331 bias = registry.getDatasetType("bias")
1332 flat = registry.getDatasetType("flat")
1333 # Obtain expected results from methods other than those we're testing
1334 # here. That includes:
1335 # - the dimensions of the data IDs we want to query:
1336 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1337 # - the dimensions of some other data IDs we'll extract from that:
1338 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1339 # - the data IDs we expect to obtain from the first queries:
1340 expectedDataIds = DataCoordinateSet(
1341 {
1342 DataCoordinate.standardize(
1343 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1344 )
1345 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1346 },
1347 graph=expectedGraph,
1348 hasFull=False,
1349 hasRecords=False,
1350 )
1351 # - the flat datasets we expect to find from those data IDs, in just
1352 # one collection (so deduplication is irrelevant):
1353 expectedFlats = [
1354 registry.findDataset(
1355 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1356 ),
1357 registry.findDataset(
1358 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1359 ),
1360 registry.findDataset(
1361 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1362 ),
1363 ]
1364 # - the data IDs we expect to extract from that:
1365 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1366 # - the bias datasets we expect to find from those data IDs, after we
1367 # subset-out the physical_filter dimension, both with duplicates:
1368 expectedAllBiases = [
1369 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1370 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1371 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1372 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1373 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1374 ]
1375 # - ...and without duplicates:
1376 expectedDeduplicatedBiases = [
1377 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1378 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1379 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1380 ]
1381 # Test against those expected results, using a "lazy" query for the
1382 # data IDs (which re-executes that query each time we use it to do
1383 # something new).
1384 dataIds = registry.queryDataIds(
1385 ["detector", "physical_filter"],
1386 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1387 instrument="Cam1",
1388 )
1389 self.assertEqual(dataIds.graph, expectedGraph)
1390 self.assertEqual(dataIds.toSet(), expectedDataIds)
1391 self.assertCountEqual(
1392 list(
1393 dataIds.findDatasets(
1394 flat,
1395 collections=["imported_r"],
1396 )
1397 ),
1398 expectedFlats,
1399 )
1400 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1401 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1402 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1403 self.assertCountEqual(
1404 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1405 expectedAllBiases,
1406 )
1407 self.assertCountEqual(
1408 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1409 expectedDeduplicatedBiases,
1410 )
1411 # Materialize the bias dataset queries (only) by putting the results
1412 # into temporary tables, then repeat those tests.
1413 with subsetDataIds.findDatasets(
1414 bias, collections=["imported_r", "imported_g"], findFirst=False
1415 ).materialize() as biases:
1416 self.assertCountEqual(list(biases), expectedAllBiases)
1417 with subsetDataIds.findDatasets(
1418 bias, collections=["imported_r", "imported_g"], findFirst=True
1419 ).materialize() as biases:
1420 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1421 # Materialize the data ID subset query, but not the dataset queries.
1422 with subsetDataIds.materialize() as subsetDataIds:
1423 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1424 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1425 self.assertCountEqual(
1426 list(
1427 subsetDataIds.findDatasets(
1428 bias, collections=["imported_r", "imported_g"], findFirst=False
1429 )
1430 ),
1431 expectedAllBiases,
1432 )
1433 self.assertCountEqual(
1434 list(
1435 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1436 ),
1437 expectedDeduplicatedBiases,
1438 )
1439 # Materialize the dataset queries, too.
1440 with subsetDataIds.findDatasets(
1441 bias, collections=["imported_r", "imported_g"], findFirst=False
1442 ).materialize() as biases:
1443 self.assertCountEqual(list(biases), expectedAllBiases)
1444 with subsetDataIds.findDatasets(
1445 bias, collections=["imported_r", "imported_g"], findFirst=True
1446 ).materialize() as biases:
1447 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1448 # Materialize the original query, but none of the follow-up queries.
1449 with dataIds.materialize() as dataIds:
1450 self.assertEqual(dataIds.graph, expectedGraph)
1451 self.assertEqual(dataIds.toSet(), expectedDataIds)
1452 self.assertCountEqual(
1453 list(
1454 dataIds.findDatasets(
1455 flat,
1456 collections=["imported_r"],
1457 )
1458 ),
1459 expectedFlats,
1460 )
1461 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1462 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1463 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1464 self.assertCountEqual(
1465 list(
1466 subsetDataIds.findDatasets(
1467 bias, collections=["imported_r", "imported_g"], findFirst=False
1468 )
1469 ),
1470 expectedAllBiases,
1471 )
1472 self.assertCountEqual(
1473 list(
1474 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1475 ),
1476 expectedDeduplicatedBiases,
1477 )
1478 # Materialize just the bias dataset queries.
1479 with subsetDataIds.findDatasets(
1480 bias, collections=["imported_r", "imported_g"], findFirst=False
1481 ).materialize() as biases:
1482 self.assertCountEqual(list(biases), expectedAllBiases)
1483 with subsetDataIds.findDatasets(
1484 bias, collections=["imported_r", "imported_g"], findFirst=True
1485 ).materialize() as biases:
1486 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1487 # Materialize the subset data ID query, but not the dataset
1488 # queries.
1489 with subsetDataIds.materialize() as subsetDataIds:
1490 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1491 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1492 self.assertCountEqual(
1493 list(
1494 subsetDataIds.findDatasets(
1495 bias, collections=["imported_r", "imported_g"], findFirst=False
1496 )
1497 ),
1498 expectedAllBiases,
1499 )
1500 self.assertCountEqual(
1501 list(
1502 subsetDataIds.findDatasets(
1503 bias, collections=["imported_r", "imported_g"], findFirst=True
1504 )
1505 ),
1506 expectedDeduplicatedBiases,
1507 )
1508 # Materialize the bias dataset queries, too, so now we're
1509 # materializing every single step.
1510 with subsetDataIds.findDatasets(
1511 bias, collections=["imported_r", "imported_g"], findFirst=False
1512 ).materialize() as biases:
1513 self.assertCountEqual(list(biases), expectedAllBiases)
1514 with subsetDataIds.findDatasets(
1515 bias, collections=["imported_r", "imported_g"], findFirst=True
1516 ).materialize() as biases:
1517 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1519 def testEmptyDimensionsQueries(self):
1520 """Test Query and QueryResults objects in the case where there are no
1521 dimensions.
1522 """
1523 # Set up test data: one dataset type, two runs, one dataset in each.
1524 registry = self.makeRegistry()
1525 self.loadData(registry, "base.yaml")
1526 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1527 registry.registerDatasetType(schema)
1528 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1529 run1 = "run1"
1530 run2 = "run2"
1531 registry.registerRun(run1)
1532 registry.registerRun(run2)
1533 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1534 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1535 # Query directly for both of the datasets, and each one, one at a time.
1536 self.checkQueryResults(
1537 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1538 )
1539 self.checkQueryResults(
1540 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1541 [dataset1],
1542 )
1543 self.checkQueryResults(
1544 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1545 [dataset2],
1546 )
1547 # Query for data IDs with no dimensions.
1548 dataIds = registry.queryDataIds([])
1549 self.checkQueryResults(dataIds, [dataId])
1550 # Use queried data IDs to find the datasets.
1551 self.checkQueryResults(
1552 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1553 [dataset1, dataset2],
1554 )
1555 self.checkQueryResults(
1556 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1557 [dataset1],
1558 )
1559 self.checkQueryResults(
1560 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1561 [dataset2],
1562 )
1563 # Now materialize the data ID query results and repeat those tests.
1564 with dataIds.materialize() as dataIds:
1565 self.checkQueryResults(dataIds, [dataId])
1566 self.checkQueryResults(
1567 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1568 [dataset1],
1569 )
1570 self.checkQueryResults(
1571 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1572 [dataset2],
1573 )
1574 # Query for non-empty data IDs, then subset that to get the empty one.
1575 # Repeat the above tests starting from that.
1576 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1577 self.checkQueryResults(dataIds, [dataId])
1578 self.checkQueryResults(
1579 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1580 [dataset1, dataset2],
1581 )
1582 self.checkQueryResults(
1583 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1584 [dataset1],
1585 )
1586 self.checkQueryResults(
1587 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1588 [dataset2],
1589 )
1590 with dataIds.materialize() as dataIds:
1591 self.checkQueryResults(dataIds, [dataId])
1592 self.checkQueryResults(
1593 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1594 [dataset1, dataset2],
1595 )
1596 self.checkQueryResults(
1597 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1598 [dataset1],
1599 )
1600 self.checkQueryResults(
1601 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1602 [dataset2],
1603 )
1604 # Query for non-empty data IDs, then materialize, then subset to get
1605 # the empty one. Repeat again.
1606 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1607 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1608 self.checkQueryResults(dataIds, [dataId])
1609 self.checkQueryResults(
1610 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1611 [dataset1, dataset2],
1612 )
1613 self.checkQueryResults(
1614 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1615 [dataset1],
1616 )
1617 self.checkQueryResults(
1618 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1619 [dataset2],
1620 )
1621 with dataIds.materialize() as dataIds:
1622 self.checkQueryResults(dataIds, [dataId])
1623 self.checkQueryResults(
1624 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1625 [dataset1, dataset2],
1626 )
1627 self.checkQueryResults(
1628 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1629 [dataset1],
1630 )
1631 self.checkQueryResults(
1632 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1633 [dataset2],
1634 )
1636 def testDimensionDataModifications(self):
1637 """Test that modifying dimension records via:
1638 syncDimensionData(..., update=True) and
1639 insertDimensionData(..., replace=True) works as expected, even in the
1640 presence of datasets using those dimensions and spatial overlap
1641 relationships.
1642 """
1644 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1645 """Unpack a sphgeom.RangeSet into the integers it contains."""
1646 for begin, end in ranges:
1647 yield from range(begin, end)
1649 def range_set_hull(
1650 ranges: lsst.sphgeom.RangeSet,
1651 pixelization: lsst.sphgeom.HtmPixelization,
1652 ) -> lsst.sphgeom.ConvexPolygon:
1653 """Create a ConvexPolygon hull of the region defined by a set of
1654 HTM pixelization index ranges.
1655 """
1656 points = []
1657 for index in unpack_range_set(ranges):
1658 points.extend(pixelization.triangle(index).getVertices())
1659 return lsst.sphgeom.ConvexPolygon(points)
1661 # Use HTM to set up an initial parent region (one arbitrary trixel)
1662 # and four child regions (the trixels within the parent at the next
1663 # level. We'll use the parent as a tract/visit region and the children
1664 # as its patch/visit_detector regions.
1665 registry = self.makeRegistry()
1666 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1667 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1668 index = 12288
1669 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1670 assert htm6.universe().contains(child_ranges_small)
1671 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1672 parent_region_small = lsst.sphgeom.ConvexPolygon(
1673 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1674 )
1675 assert all(parent_region_small.contains(c) for c in child_regions_small)
1676 # Make a larger version of each child region, defined to be the set of
1677 # htm6 trixels that overlap the original's bounding circle. Make a new
1678 # parent that's the convex hull of the new children.
1679 child_regions_large = [
1680 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1681 ]
1682 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1683 parent_region_large = lsst.sphgeom.ConvexPolygon(
1684 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1685 )
1686 assert all(parent_region_large.contains(c) for c in child_regions_large)
1687 assert parent_region_large.contains(parent_region_small)
1688 assert not parent_region_small.contains(parent_region_large)
1689 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1690 # Find some commonSkyPix indices that overlap the large regions but not
1691 # overlap the small regions. We use commonSkyPix here to make sure the
1692 # real tests later involve what's in the database, not just post-query
1693 # region filtering.
1694 child_difference_indices = []
1695 for large, small in zip(child_regions_large, child_regions_small):
1696 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1697 assert difference, "if this is empty, we can't test anything useful with these regions"
1698 assert all(
1699 not commonSkyPix.triangle(d).isDisjointFrom(large)
1700 and commonSkyPix.triangle(d).isDisjointFrom(small)
1701 for d in difference
1702 )
1703 child_difference_indices.append(difference)
1704 parent_difference_indices = list(
1705 unpack_range_set(
1706 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1707 )
1708 )
1709 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1710 assert all(
1711 (
1712 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1713 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1714 )
1715 for d in parent_difference_indices
1716 )
1717 # Now that we've finally got those regions, we'll insert the large ones
1718 # as tract/patch dimension records.
1719 skymap_name = "testing_v1"
1720 registry.insertDimensionData(
1721 "skymap",
1722 {
1723 "name": skymap_name,
1724 "hash": bytes([42]),
1725 "tract_max": 1,
1726 "patch_nx_max": 2,
1727 "patch_ny_max": 2,
1728 },
1729 )
1730 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1731 registry.insertDimensionData(
1732 "patch",
1733 *[
1734 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1735 for n, c in enumerate(child_regions_large)
1736 ],
1737 )
1738 # Add at dataset that uses these dimensions to make sure that modifying
1739 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1740 # implement insert with replace=True as delete-then-insert).
1741 dataset_type = DatasetType(
1742 "coadd",
1743 dimensions=["tract", "patch"],
1744 universe=registry.dimensions,
1745 storageClass="Exposure",
1746 )
1747 registry.registerDatasetType(dataset_type)
1748 registry.registerCollection("the_run", CollectionType.RUN)
1749 registry.insertDatasets(
1750 dataset_type,
1751 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1752 run="the_run",
1753 )
1754 # Query for tracts and patches that overlap some "difference" htm9
1755 # pixels; there should be overlaps, because the database has
1756 # the "large" suite of regions.
1757 self.assertEqual(
1758 {0},
1759 {
1760 data_id["tract"]
1761 for data_id in registry.queryDataIds(
1762 ["tract"],
1763 skymap=skymap_name,
1764 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1765 )
1766 },
1767 )
1768 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1769 self.assertIn(
1770 patch_id,
1771 {
1772 data_id["patch"]
1773 for data_id in registry.queryDataIds(
1774 ["patch"],
1775 skymap=skymap_name,
1776 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1777 )
1778 },
1779 )
1780 # Use sync to update the tract region and insert to update the patch
1781 # regions, to the "small" suite.
1782 updated = registry.syncDimensionData(
1783 "tract",
1784 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1785 update=True,
1786 )
1787 self.assertEqual(updated, {"region": parent_region_large})
1788 registry.insertDimensionData(
1789 "patch",
1790 *[
1791 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1792 for n, c in enumerate(child_regions_small)
1793 ],
1794 replace=True,
1795 )
1796 # Query again; there now should be no such overlaps, because the
1797 # database has the "small" suite of regions.
1798 self.assertFalse(
1799 set(
1800 registry.queryDataIds(
1801 ["tract"],
1802 skymap=skymap_name,
1803 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1804 )
1805 )
1806 )
1807 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1808 self.assertNotIn(
1809 patch_id,
1810 {
1811 data_id["patch"]
1812 for data_id in registry.queryDataIds(
1813 ["patch"],
1814 skymap=skymap_name,
1815 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1816 )
1817 },
1818 )
1819 # Update back to the large regions and query one more time.
1820 updated = registry.syncDimensionData(
1821 "tract",
1822 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1823 update=True,
1824 )
1825 self.assertEqual(updated, {"region": parent_region_small})
1826 registry.insertDimensionData(
1827 "patch",
1828 *[
1829 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1830 for n, c in enumerate(child_regions_large)
1831 ],
1832 replace=True,
1833 )
1834 self.assertEqual(
1835 {0},
1836 {
1837 data_id["tract"]
1838 for data_id in registry.queryDataIds(
1839 ["tract"],
1840 skymap=skymap_name,
1841 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1842 )
1843 },
1844 )
1845 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1846 self.assertIn(
1847 patch_id,
1848 {
1849 data_id["patch"]
1850 for data_id in registry.queryDataIds(
1851 ["patch"],
1852 skymap=skymap_name,
1853 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1854 )
1855 },
1856 )
1858 def testCalibrationCollections(self):
1859 """Test operations on `~CollectionType.CALIBRATION` collections,
1860 including `Registry.certify`, `Registry.decertify`, and
1861 `Registry.findDataset`.
1862 """
1863 # Setup - make a Registry, fill it with some datasets in
1864 # non-calibration collections.
1865 registry = self.makeRegistry()
1866 self.loadData(registry, "base.yaml")
1867 self.loadData(registry, "datasets.yaml")
1868 # Set up some timestamps.
1869 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1870 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1871 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1872 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1873 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1874 allTimespans = [
1875 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1876 ]
1877 # Get references to some datasets.
1878 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1879 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1880 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1881 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1882 # Register the main calibration collection we'll be working with.
1883 collection = "Cam1/calibs/default"
1884 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1885 # Cannot associate into a calibration collection (no timespan).
1886 with self.assertRaises(CollectionTypeError):
1887 registry.associate(collection, [bias2a])
1888 # Certify 2a dataset with [t2, t4) validity.
1889 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1890 # We should not be able to certify 2b with anything overlapping that
1891 # window.
1892 with self.assertRaises(ConflictingDefinitionError):
1893 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1894 with self.assertRaises(ConflictingDefinitionError):
1895 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1896 with self.assertRaises(ConflictingDefinitionError):
1897 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1898 with self.assertRaises(ConflictingDefinitionError):
1899 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1900 with self.assertRaises(ConflictingDefinitionError):
1901 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1902 with self.assertRaises(ConflictingDefinitionError):
1903 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1904 with self.assertRaises(ConflictingDefinitionError):
1905 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1906 with self.assertRaises(ConflictingDefinitionError):
1907 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1908 # We should be able to certify 3a with a range overlapping that window,
1909 # because it's for a different detector.
1910 # We'll certify 3a over [t1, t3).
1911 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1912 # Now we'll certify 2b and 3b together over [t4, ∞).
1913 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1915 # Fetch all associations and check that they are what we expect.
1916 self.assertCountEqual(
1917 list(
1918 registry.queryDatasetAssociations(
1919 "bias",
1920 collections=[collection, "imported_g", "imported_r"],
1921 )
1922 ),
1923 [
1924 DatasetAssociation(
1925 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1926 collection="imported_g",
1927 timespan=None,
1928 ),
1929 DatasetAssociation(
1930 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1931 collection="imported_r",
1932 timespan=None,
1933 ),
1934 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1935 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1936 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1937 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1938 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1939 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1940 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1941 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1942 ],
1943 )
1945 class Ambiguous:
1946 """Tag class to denote lookups that should be ambiguous."""
1948 pass
1950 def assertLookup(
1951 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
1952 ) -> None:
1953 """Local function that asserts that a bias lookup returns the given
1954 expected result.
1955 """
1956 if expected is Ambiguous:
1957 with self.assertRaises(RuntimeError):
1958 registry.findDataset(
1959 "bias",
1960 collections=collection,
1961 instrument="Cam1",
1962 detector=detector,
1963 timespan=timespan,
1964 )
1965 else:
1966 self.assertEqual(
1967 expected,
1968 registry.findDataset(
1969 "bias",
1970 collections=collection,
1971 instrument="Cam1",
1972 detector=detector,
1973 timespan=timespan,
1974 ),
1975 )
1977 # Systematically test lookups against expected results.
1978 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1979 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1980 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1981 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1982 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1983 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1984 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1985 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1986 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1987 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1988 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1989 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1990 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1991 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1992 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1993 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1994 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1995 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1996 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1997 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1998 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1999 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2000 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2001 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2002 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2003 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2004 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2005 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2006 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2007 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2008 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2009 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2010 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2011 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2012 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2013 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2014 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2015 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2016 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2017 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2018 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2019 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2021 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2022 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2023 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2024 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2025 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2026 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2027 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2028 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2029 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2030 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2031 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2032 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2033 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2034 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2035 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2036 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2037 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2038 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2039 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2040 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2041 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2042 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2043 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2044 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2045 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2046 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2047 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2048 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2049 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2050 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2051 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2052 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2053 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2054 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2055 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2056 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2057 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2058 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2059 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2060 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2061 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2062 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2063 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2064 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2065 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2066 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2068 # Decertify everything, this time with explicit data IDs, then check
2069 # that no lookups succeed.
2070 registry.decertify(
2071 collection,
2072 "bias",
2073 Timespan(None, None),
2074 dataIds=[
2075 dict(instrument="Cam1", detector=2),
2076 dict(instrument="Cam1", detector=3),
2077 ],
2078 )
2079 for detector in (2, 3):
2080 for timespan in allTimespans:
2081 assertLookup(detector=detector, timespan=timespan, expected=None)
2082 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2083 # those.
2084 registry.certify(
2085 collection,
2086 [bias2a, bias3a],
2087 Timespan(None, None),
2088 )
2089 for timespan in allTimespans:
2090 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2091 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2092 # Decertify just bias2 over [t2, t4).
2093 # This should split a single certification row into two (and leave the
2094 # other existing row, for bias3a, alone).
2095 registry.decertify(
2096 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2097 )
2098 for timespan in allTimespans:
2099 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2100 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2101 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2102 if overlapsBefore and overlapsAfter:
2103 expected = Ambiguous
2104 elif overlapsBefore or overlapsAfter:
2105 expected = bias2a
2106 else:
2107 expected = None
2108 assertLookup(detector=2, timespan=timespan, expected=expected)
2110 def testSkipCalibs(self):
2111 """Test how queries handle skipping of calibration collections."""
2112 registry = self.makeRegistry()
2113 self.loadData(registry, "base.yaml")
2114 self.loadData(registry, "datasets.yaml")
2116 coll_calib = "Cam1/calibs/default"
2117 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2119 # Add all biases to the calibration collection.
2120 # Without this, the logic that prunes dataset subqueries based on
2121 # datasetType-collection summary information will fire before the logic
2122 # we want to test below. This is a good thing (it avoids the dreaded
2123 # NotImplementedError a bit more often) everywhere but here.
2124 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2126 coll_list = [coll_calib, "imported_g", "imported_r"]
2127 chain = "Cam1/chain"
2128 registry.registerCollection(chain, type=CollectionType.CHAINED)
2129 registry.setCollectionChain(chain, coll_list)
2131 # explicit list will raise if findFirst=True or there are temporal
2132 # dimensions
2133 with self.assertRaises(NotImplementedError):
2134 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2135 with self.assertRaises(NotImplementedError):
2136 registry.queryDataIds(
2137 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2138 ).count()
2140 # chain will skip
2141 datasets = list(registry.queryDatasets("bias", collections=chain))
2142 self.assertGreater(len(datasets), 0)
2144 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2145 self.assertGreater(len(dataIds), 0)
2147 # glob will skip too
2148 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2149 self.assertGreater(len(datasets), 0)
2151 # regular expression will skip too
2152 pattern = re.compile(".*")
2153 datasets = list(registry.queryDatasets("bias", collections=pattern))
2154 self.assertGreater(len(datasets), 0)
2156 # ellipsis should work as usual
2157 datasets = list(registry.queryDatasets("bias", collections=...))
2158 self.assertGreater(len(datasets), 0)
2160 # few tests with findFirst
2161 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2162 self.assertGreater(len(datasets), 0)
2164 def testIngestTimeQuery(self):
2166 registry = self.makeRegistry()
2167 self.loadData(registry, "base.yaml")
2168 dt0 = datetime.utcnow()
2169 self.loadData(registry, "datasets.yaml")
2170 dt1 = datetime.utcnow()
2172 datasets = list(registry.queryDatasets(..., collections=...))
2173 len0 = len(datasets)
2174 self.assertGreater(len0, 0)
2176 where = "ingest_date > T'2000-01-01'"
2177 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2178 len1 = len(datasets)
2179 self.assertEqual(len0, len1)
2181 # no one will ever use this piece of software in 30 years
2182 where = "ingest_date > T'2050-01-01'"
2183 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2184 len2 = len(datasets)
2185 self.assertEqual(len2, 0)
2187 # Check more exact timing to make sure there is no 37 seconds offset
2188 # (after fixing DM-30124). SQLite time precision is 1 second, make
2189 # sure that we don't test with higher precision.
2190 tests = [
2191 # format: (timestamp, operator, expected_len)
2192 (dt0 - timedelta(seconds=1), ">", len0),
2193 (dt0 - timedelta(seconds=1), "<", 0),
2194 (dt1 + timedelta(seconds=1), "<", len0),
2195 (dt1 + timedelta(seconds=1), ">", 0),
2196 ]
2197 for dt, op, expect_len in tests:
2198 dt_str = dt.isoformat(sep=" ")
2200 where = f"ingest_date {op} T'{dt_str}'"
2201 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2202 self.assertEqual(len(datasets), expect_len)
2204 # same with bind using datetime or astropy Time
2205 where = f"ingest_date {op} ingest_time"
2206 datasets = list(
2207 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2208 )
2209 self.assertEqual(len(datasets), expect_len)
2211 dt_astropy = astropy.time.Time(dt, format="datetime")
2212 datasets = list(
2213 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2214 )
2215 self.assertEqual(len(datasets), expect_len)
2217 def testTimespanQueries(self):
2218 """Test query expressions involving timespans."""
2219 registry = self.makeRegistry()
2220 self.loadData(registry, "hsc-rc2-subset.yaml")
2221 # All exposures in the database; mapping from ID to timespan.
2222 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2223 # Just those IDs, sorted (which is also temporal sorting, because HSC
2224 # exposure IDs are monotonically increasing).
2225 ids = sorted(visits.keys())
2226 self.assertGreater(len(ids), 20)
2227 # Pick some quasi-random indexes into `ids` to play with.
2228 i1 = int(len(ids) * 0.1)
2229 i2 = int(len(ids) * 0.3)
2230 i3 = int(len(ids) * 0.6)
2231 i4 = int(len(ids) * 0.8)
2232 # Extract some times from those: just before the beginning of i1 (which
2233 # should be after the end of the exposure before), exactly the
2234 # beginning of i2, just after the beginning of i3 (and before its end),
2235 # and the exact end of i4.
2236 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2237 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2238 t2 = visits[ids[i2]].begin
2239 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2240 self.assertLess(t3, visits[ids[i3]].end)
2241 t4 = visits[ids[i4]].end
2242 # Make sure those are actually in order.
2243 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2245 bind = {
2246 "t1": t1,
2247 "t2": t2,
2248 "t3": t3,
2249 "t4": t4,
2250 "ts23": Timespan(t2, t3),
2251 }
2253 def query(where):
2254 """Helper function that queries for visit data IDs and returns
2255 results as a sorted, deduplicated list of visit IDs.
2256 """
2257 return sorted(
2258 {
2259 dataId["visit"]
2260 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2261 }
2262 )
2264 # Try a bunch of timespan queries, mixing up the bounds themselves,
2265 # where they appear in the expression, and how we get the timespan into
2266 # the expression.
2268 # t1 is before the start of i1, so this should not include i1.
2269 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2270 # t2 is exactly at the start of i2, but ends are exclusive, so these
2271 # should not include i2.
2272 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2273 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2274 # t3 is in the middle of i3, so this should include i3.
2275 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2276 # This one should not include t3 by the same reasoning.
2277 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2278 # t4 is exactly at the end of i4, so this should include i4.
2279 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2280 # i4's upper bound of t4 is exclusive so this should not include t4.
2281 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2283 # Now some timespan vs. time scalar queries.
2284 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2285 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2286 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2287 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2288 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2289 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2291 # Empty timespans should not overlap anything.
2292 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2294 def testCollectionSummaries(self):
2295 """Test recording and retrieval of collection summaries."""
2296 self.maxDiff = None
2297 registry = self.makeRegistry()
2298 # Importing datasets from yaml should go through the code path where
2299 # we update collection summaries as we insert datasets.
2300 self.loadData(registry, "base.yaml")
2301 self.loadData(registry, "datasets.yaml")
2302 flat = registry.getDatasetType("flat")
2303 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2304 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2305 expected1.datasetTypes.add(flat)
2306 expected1.dimensions.update_extract(
2307 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2308 )
2309 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2310 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2311 # Create a chained collection with both of the imported runs; the
2312 # summary should be the same, because it's a union with itself.
2313 chain = "chain"
2314 registry.registerCollection(chain, CollectionType.CHAINED)
2315 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2316 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2317 # Associate flats only into a tagged collection and a calibration
2318 # collection to check summaries of those.
2319 tag = "tag"
2320 registry.registerCollection(tag, CollectionType.TAGGED)
2321 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2322 calibs = "calibs"
2323 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2324 registry.certify(
2325 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2326 )
2327 expected2 = expected1.copy()
2328 expected2.datasetTypes.discard("bias")
2329 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2330 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2331 # Explicitly calling Registry.refresh() should load those same
2332 # summaries, via a totally different code path.
2333 registry.refresh()
2334 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2335 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2336 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2337 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2339 def testBindInQueryDatasets(self):
2340 """Test that the bind parameter is correctly forwarded in
2341 queryDatasets recursion.
2342 """
2343 registry = self.makeRegistry()
2344 # Importing datasets from yaml should go through the code path where
2345 # we update collection summaries as we insert datasets.
2346 self.loadData(registry, "base.yaml")
2347 self.loadData(registry, "datasets.yaml")
2348 self.assertEqual(
2349 set(registry.queryDatasets("flat", band="r", collections=...)),
2350 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2351 )
2353 def testQueryResultSummaries(self):
2354 """Test summary methods like `count`, `any`, and `explain_no_results`
2355 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2356 """
2357 registry = self.makeRegistry()
2358 self.loadData(registry, "base.yaml")
2359 self.loadData(registry, "datasets.yaml")
2360 self.loadData(registry, "spatial.yaml")
2361 # Default test dataset has two collections, each with both flats and
2362 # biases. Add a new collection with only biases.
2363 registry.registerCollection("biases", CollectionType.TAGGED)
2364 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2365 # First query yields two results, and involves no postprocessing.
2366 query1 = registry.queryDataIds(["physical_filter"], band="r")
2367 self.assertTrue(query1.any(execute=False, exact=False))
2368 self.assertTrue(query1.any(execute=True, exact=False))
2369 self.assertTrue(query1.any(execute=True, exact=True))
2370 self.assertEqual(query1.count(exact=False), 2)
2371 self.assertEqual(query1.count(exact=True), 2)
2372 self.assertFalse(list(query1.explain_no_results()))
2373 # Second query should yield no results, but this isn't detectable
2374 # unless we actually run a query.
2375 query2 = registry.queryDataIds(["physical_filter"], band="h")
2376 self.assertTrue(query2.any(execute=False, exact=False))
2377 self.assertFalse(query2.any(execute=True, exact=False))
2378 self.assertFalse(query2.any(execute=True, exact=True))
2379 self.assertEqual(query2.count(exact=False), 0)
2380 self.assertEqual(query2.count(exact=True), 0)
2381 self.assertFalse(list(query2.explain_no_results()))
2382 # These queries yield no results due to various problems that can be
2383 # spotted prior to execution, yielding helpful diagnostics.
2384 for query, snippets in [
2385 (
2386 # Dataset type name doesn't match any existing dataset types.
2387 registry.queryDatasets("nonexistent", collections=...),
2388 ["nonexistent"],
2389 ),
2390 (
2391 # Dataset type name doesn't match any existing dataset types.
2392 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2393 ["nonexistent"],
2394 ),
2395 (
2396 # Dataset type object isn't registered.
2397 registry.queryDatasets(
2398 DatasetType(
2399 "nonexistent",
2400 dimensions=["instrument"],
2401 universe=registry.dimensions,
2402 storageClass="Image",
2403 ),
2404 collections=...,
2405 ),
2406 ["nonexistent"],
2407 ),
2408 (
2409 # No datasets of this type in this collection.
2410 registry.queryDatasets("flat", collections=["biases"]),
2411 ["flat", "biases"],
2412 ),
2413 (
2414 # No collections matching at all.
2415 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2416 ["potato"],
2417 ),
2418 (
2419 # Dataset type name doesn't match any existing dataset types.
2420 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2421 ["nonexistent"],
2422 ),
2423 ]:
2425 self.assertFalse(query.any(execute=False, exact=False))
2426 self.assertFalse(query.any(execute=True, exact=False))
2427 self.assertFalse(query.any(execute=True, exact=True))
2428 self.assertEqual(query.count(exact=False), 0)
2429 self.assertEqual(query.count(exact=True), 0)
2430 messages = list(query.explain_no_results())
2431 self.assertTrue(messages)
2432 # Want all expected snippets to appear in at least one message.
2433 self.assertTrue(
2434 any(
2435 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2436 ),
2437 messages,
2438 )
2440 # These queries yield no results due to problems that can be identified
2441 # by cheap follow-up queries, yielding helpful diagnostics.
2442 for query, snippets in [
2443 (
2444 # No records for one of the involved dimensions.
2445 registry.queryDataIds(["subfilter"]),
2446 ["dimension records", "subfilter"],
2447 ),
2448 (
2449 # No records for one of the involved dimensions.
2450 registry.queryDimensionRecords("subfilter"),
2451 ["dimension records", "subfilter"],
2452 ),
2453 ]:
2454 self.assertFalse(query.any(execute=True, exact=False))
2455 self.assertFalse(query.any(execute=True, exact=True))
2456 self.assertEqual(query.count(exact=True), 0)
2457 messages = list(query.explain_no_results())
2458 self.assertTrue(messages)
2459 # Want all expected snippets to appear in at least one message.
2460 self.assertTrue(
2461 any(
2462 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2463 ),
2464 messages,
2465 )
2467 # This query yields four overlaps in the database, but one is filtered
2468 # out in postprocessing. The count queries aren't accurate because
2469 # they don't account for duplication that happens due to an internal
2470 # join against commonSkyPix.
2471 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2472 self.assertEqual(
2473 {
2474 DataCoordinate.standardize(
2475 instrument="Cam1",
2476 skymap="SkyMap1",
2477 visit=v,
2478 tract=t,
2479 universe=registry.dimensions,
2480 )
2481 for v, t in [(1, 0), (2, 0), (2, 1)]
2482 },
2483 set(query3),
2484 )
2485 self.assertTrue(query3.any(execute=False, exact=False))
2486 self.assertTrue(query3.any(execute=True, exact=False))
2487 self.assertTrue(query3.any(execute=True, exact=True))
2488 self.assertGreaterEqual(query3.count(exact=False), 4)
2489 self.assertGreaterEqual(query3.count(exact=True), 3)
2490 self.assertFalse(list(query3.explain_no_results()))
2491 # This query yields overlaps in the database, but all are filtered
2492 # out in postprocessing. The count queries again aren't very useful.
2493 # We have to use `where=` here to avoid an optimization that
2494 # (currently) skips the spatial postprocess-filtering because it
2495 # recognizes that no spatial join is necessary. That's not ideal, but
2496 # fixing it is out of scope for this ticket.
2497 query4 = registry.queryDataIds(
2498 ["visit", "tract"],
2499 instrument="Cam1",
2500 skymap="SkyMap1",
2501 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2502 )
2503 self.assertFalse(set(query4))
2504 self.assertTrue(query4.any(execute=False, exact=False))
2505 self.assertTrue(query4.any(execute=True, exact=False))
2506 self.assertFalse(query4.any(execute=True, exact=True))
2507 self.assertGreaterEqual(query4.count(exact=False), 1)
2508 self.assertEqual(query4.count(exact=True), 0)
2509 messages = list(query4.explain_no_results())
2510 self.assertTrue(messages)
2511 self.assertTrue(any("regions did not overlap" in message for message in messages))
2513 # And there are cases when queries make empty results but we do not
2514 # know how to explain that yet (could we just say miracles happen?)
2515 query5 = registry.queryDimensionRecords(
2516 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2517 )
2518 self.assertEqual(query5.count(exact=True), 0)
2519 messages = list(query5.explain_no_results())
2520 self.assertFalse(messages)
2522 def testQueryDataIdsOrderBy(self):
2523 """Test order_by and limit on result returned by queryDataIds()."""
2524 registry = self.makeRegistry()
2525 self.loadData(registry, "base.yaml")
2526 self.loadData(registry, "datasets.yaml")
2527 self.loadData(registry, "spatial.yaml")
2529 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2530 return registry.queryDataIds(
2531 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2532 )
2534 Test = namedtuple(
2535 "testQueryDataIdsOrderByTest",
2536 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2537 defaults=(None, None, None),
2538 )
2540 test_data = (
2541 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2542 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2543 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2544 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2545 Test(
2546 "tract.id,visit.id",
2547 "tract,visit",
2548 ((0, 1), (0, 1), (0, 2)),
2549 limit=(3,),
2550 ),
2551 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2552 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2553 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2554 Test(
2555 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2556 ),
2557 Test(
2558 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2559 ),
2560 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2561 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2562 Test(
2563 "tract,-timespan.begin,timespan.end",
2564 "tract,visit",
2565 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2566 ),
2567 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2568 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2569 Test(
2570 "tract,detector",
2571 "tract,detector",
2572 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2573 datasets="flat",
2574 collections="imported_r",
2575 ),
2576 Test(
2577 "tract,detector.full_name",
2578 "tract,detector",
2579 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2580 datasets="flat",
2581 collections="imported_r",
2582 ),
2583 Test(
2584 "tract,detector.raft,detector.name_in_raft",
2585 "tract,detector",
2586 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2587 datasets="flat",
2588 collections="imported_r",
2589 ),
2590 )
2592 for test in test_data:
2593 order_by = test.order_by.split(",")
2594 keys = test.keys.split(",")
2595 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2596 if test.limit is not None:
2597 query = query.limit(*test.limit)
2598 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2599 self.assertEqual(dataIds, test.result)
2601 # and materialize
2602 query = do_query(keys).order_by(*order_by)
2603 if test.limit is not None:
2604 query = query.limit(*test.limit)
2605 with query.materialize() as materialized:
2606 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2607 self.assertEqual(dataIds, test.result)
2609 # errors in a name
2610 for order_by in ("", "-"):
2611 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2612 list(do_query().order_by(order_by))
2614 for order_by in ("undimension.name", "-undimension.name"):
2615 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2616 list(do_query().order_by(order_by))
2618 for order_by in ("attract", "-attract"):
2619 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2620 list(do_query().order_by(order_by))
2622 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2623 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2625 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2626 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2628 with self.assertRaisesRegex(
2629 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2630 ):
2631 list(do_query(("tract")).order_by("timespan.begin"))
2633 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2634 list(do_query(("tract")).order_by("tract.timespan.begin"))
2636 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2637 list(do_query(("tract")).order_by("tract.name"))
2639 def testQueryDataIdsGovernorExceptions(self):
2640 """Test exceptions raised by queryDataIds() for incorrect governors."""
2641 registry = self.makeRegistry()
2642 self.loadData(registry, "base.yaml")
2643 self.loadData(registry, "datasets.yaml")
2644 self.loadData(registry, "spatial.yaml")
2646 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs):
2647 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2649 Test = namedtuple(
2650 "testQueryDataIdExceptionsTest",
2651 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2652 defaults=(None, None, None, {}, None, 0),
2653 )
2655 test_data = (
2656 Test("tract,visit", count=6),
2657 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2658 Test(
2659 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2660 ),
2661 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2662 Test(
2663 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2664 ),
2665 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2666 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2667 Test(
2668 "tract,visit",
2669 where="instrument=cam AND skymap=map",
2670 bind={"cam": "Cam1", "map": "SkyMap1"},
2671 count=6,
2672 ),
2673 Test(
2674 "tract,visit",
2675 where="instrument=cam AND skymap=map",
2676 bind={"cam": "Cam", "map": "SkyMap"},
2677 exception=DataIdValueError,
2678 ),
2679 )
2681 for test in test_data:
2682 dimensions = test.dimensions.split(",")
2683 if test.exception:
2684 with self.assertRaises(test.exception):
2685 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2686 else:
2687 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2688 self.assertEqual(query.count(), test.count)
2690 # and materialize
2691 if test.exception:
2692 with self.assertRaises(test.exception):
2693 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2694 with query.materialize() as materialized:
2695 materialized.count()
2696 else:
2697 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2698 with query.materialize() as materialized:
2699 self.assertEqual(materialized.count(), test.count)
2701 def testQueryDimensionRecordsOrderBy(self):
2702 """Test order_by and limit on result returned by
2703 queryDimensionRecords().
2704 """
2705 registry = self.makeRegistry()
2706 self.loadData(registry, "base.yaml")
2707 self.loadData(registry, "datasets.yaml")
2708 self.loadData(registry, "spatial.yaml")
2710 def do_query(element, datasets=None, collections=None):
2711 return registry.queryDimensionRecords(
2712 element, instrument="Cam1", datasets=datasets, collections=collections
2713 )
2715 query = do_query("detector")
2716 self.assertEqual(len(list(query)), 4)
2718 Test = namedtuple(
2719 "testQueryDataIdsOrderByTest",
2720 ("element", "order_by", "result", "limit", "datasets", "collections"),
2721 defaults=(None, None, None),
2722 )
2724 test_data = (
2725 Test("detector", "detector", (1, 2, 3, 4)),
2726 Test("detector", "-detector", (4, 3, 2, 1)),
2727 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2728 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2729 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2730 Test("visit", "visit", (1, 2)),
2731 Test("visit", "-visit.id", (2, 1)),
2732 Test("visit", "zenith_angle", (1, 2)),
2733 Test("visit", "-visit.name", (2, 1)),
2734 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2735 )
2737 for test in test_data:
2738 order_by = test.order_by.split(",")
2739 query = do_query(test.element).order_by(*order_by)
2740 if test.limit is not None:
2741 query = query.limit(*test.limit)
2742 dataIds = tuple(rec.id for rec in query)
2743 self.assertEqual(dataIds, test.result)
2745 # errors in a name
2746 for order_by in ("", "-"):
2747 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2748 list(do_query("detector").order_by(order_by))
2750 for order_by in ("undimension.name", "-undimension.name"):
2751 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2752 list(do_query("detector").order_by(order_by))
2754 for order_by in ("attract", "-attract"):
2755 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
2756 list(do_query("detector").order_by(order_by))
2758 def testQueryDimensionRecordsExceptions(self):
2759 """Test exceptions raised by queryDimensionRecords()."""
2760 registry = self.makeRegistry()
2761 self.loadData(registry, "base.yaml")
2762 self.loadData(registry, "datasets.yaml")
2763 self.loadData(registry, "spatial.yaml")
2765 result = registry.queryDimensionRecords("detector")
2766 self.assertEqual(result.count(), 4)
2767 result = registry.queryDimensionRecords("detector", instrument="Cam1")
2768 self.assertEqual(result.count(), 4)
2769 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
2770 self.assertEqual(result.count(), 4)
2771 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
2772 self.assertEqual(result.count(), 4)
2773 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
2774 self.assertEqual(result.count(), 4)
2776 with self.assertRaisesRegex(
2777 DataIdValueError, "Could not fetch record for required dimension instrument"
2778 ):
2779 registry.queryDimensionRecords("detector", instrument="NotCam1")
2781 with self.assertRaisesRegex(
2782 DataIdValueError, "Could not fetch record for required dimension instrument"
2783 ):
2784 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
2786 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2787 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
2788 result.count()
2790 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2791 result = registry.queryDimensionRecords(
2792 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
2793 )
2794 result.count()
2796 def testDatasetConstrainedDimensionRecordQueries(self):
2797 """Test that queryDimensionRecords works even when given a dataset
2798 constraint whose dimensions extend beyond the requested dimension
2799 element's.
2800 """
2801 registry = self.makeRegistry()
2802 self.loadData(registry, "base.yaml")
2803 self.loadData(registry, "datasets.yaml")
2804 # Query for physical_filter dimension records, using a dataset that
2805 # has both physical_filter and dataset dimensions.
2806 records = registry.queryDimensionRecords(
2807 "physical_filter",
2808 datasets=["flat"],
2809 collections="imported_r",
2810 )
2811 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})