Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 Timespan,
56 ddl,
57)
58from .._collectionType import CollectionType
59from .._config import RegistryConfig
60from .._exceptions import (
61 ArgumentError,
62 CollectionError,
63 CollectionTypeError,
64 ConflictingDefinitionError,
65 DataIdValueError,
66 InconsistentDataIdError,
67 MissingCollectionError,
68 OrphanedRecordError,
69)
70from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
71from ..summaries import CollectionSummary
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._registry import Registry
77class RegistryTests(ABC):
78 """Generic tests for the `Registry` class that can be subclassed to
79 generate tests for different configurations.
80 """
82 collectionsManager: Optional[str] = None
83 """Name of the collections manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 datasetsManager: Optional[str] = None
89 """Name of the datasets manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 @classmethod
95 @abstractmethod
96 def getDataDir(cls) -> str:
97 """Return the root directory containing test data YAML files."""
98 raise NotImplementedError()
100 def makeRegistryConfig(self) -> RegistryConfig:
101 """Create RegistryConfig used to create a registry.
103 This method should be called by a subclass from `makeRegistry`.
104 Returned instance will be pre-configured based on the values of class
105 members, and default-configured for all other parameters. Subclasses
106 that need default configuration should just instantiate
107 `RegistryConfig` directly.
108 """
109 config = RegistryConfig()
110 if self.collectionsManager:
111 config["managers", "collections"] = self.collectionsManager
112 if self.datasetsManager:
113 config["managers", "datasets"] = self.datasetsManager
114 return config
116 @abstractmethod
117 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
118 """Return the Registry instance to be tested.
120 Parameters
121 ----------
122 share_repo_with : `Registry`, optional
123 If provided, the new registry should point to the same data
124 repository as this existing registry.
126 Returns
127 -------
128 registry : `Registry`
129 New `Registry` instance, or `None` *only* if `share_repo_with` is
130 not `None` and this test case does not support that argument
131 (e.g. it is impossible with in-memory SQLite DBs).
132 """
133 raise NotImplementedError()
135 def loadData(self, registry: Registry, filename: str):
136 """Load registry test data from ``getDataDir/<filename>``,
137 which should be a YAML import/export file.
138 """
139 from ...transfers import YamlRepoImportBackend
141 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
142 backend = YamlRepoImportBackend(stream, registry)
143 backend.register()
144 backend.load(datastore=None)
146 def checkQueryResults(self, results, expected):
147 """Check that a query results object contains expected values.
149 Parameters
150 ----------
151 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
152 A lazy-evaluation query results object.
153 expected : `list`
154 A list of `DataCoordinate` o `DatasetRef` objects that should be
155 equal to results of the query, aside from ordering.
156 """
157 self.assertCountEqual(list(results), expected)
158 self.assertEqual(results.count(), len(expected))
159 if expected:
160 self.assertTrue(results.any())
161 else:
162 self.assertFalse(results.any())
164 def testOpaque(self):
165 """Tests for `Registry.registerOpaqueTable`,
166 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
167 `Registry.deleteOpaqueData`.
168 """
169 registry = self.makeRegistry()
170 table = "opaque_table_for_testing"
171 registry.registerOpaqueTable(
172 table,
173 spec=ddl.TableSpec(
174 fields=[
175 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
176 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
177 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
178 ],
179 ),
180 )
181 rows = [
182 {"id": 1, "name": "one", "count": None},
183 {"id": 2, "name": "two", "count": 5},
184 {"id": 3, "name": "three", "count": 6},
185 ]
186 registry.insertOpaqueData(table, *rows)
187 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
188 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
189 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
191 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
192 # Test very long IN clause which exceeds sqlite limit on number of
193 # parameters. SQLite says the limit is 32k but it looks like it is
194 # much higher.
195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
196 # Two IN clauses, each longer than 1k batch size, first with
197 # duplicates, second has matching elements in different batches (after
198 # sorting).
199 self.assertEqual(
200 rows[0:2],
201 list(
202 registry.fetchOpaqueData(
203 table,
204 id=list(range(1000)) + list(range(100, 0, -1)),
205 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
206 )
207 ),
208 )
209 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
210 registry.deleteOpaqueData(table, id=3)
211 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
212 registry.deleteOpaqueData(table)
213 self.assertEqual([], list(registry.fetchOpaqueData(table)))
215 def testDatasetType(self):
216 """Tests for `Registry.registerDatasetType` and
217 `Registry.getDatasetType`.
218 """
219 registry = self.makeRegistry()
220 # Check valid insert
221 datasetTypeName = "test"
222 storageClass = StorageClass("testDatasetType")
223 registry.storageClasses.registerStorageClass(storageClass)
224 dimensions = registry.dimensions.extract(("instrument", "visit"))
225 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
227 # Inserting for the first time should return True
228 self.assertTrue(registry.registerDatasetType(inDatasetType))
229 outDatasetType1 = registry.getDatasetType(datasetTypeName)
230 self.assertEqual(outDatasetType1, inDatasetType)
232 # Re-inserting should work
233 self.assertFalse(registry.registerDatasetType(inDatasetType))
234 # Except when they are not identical
235 with self.assertRaises(ConflictingDefinitionError):
236 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
237 registry.registerDatasetType(nonIdenticalDatasetType)
239 # Template can be None
240 datasetTypeName = "testNoneTemplate"
241 storageClass = StorageClass("testDatasetType2")
242 registry.storageClasses.registerStorageClass(storageClass)
243 dimensions = registry.dimensions.extract(("instrument", "visit"))
244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
245 registry.registerDatasetType(inDatasetType)
246 outDatasetType2 = registry.getDatasetType(datasetTypeName)
247 self.assertEqual(outDatasetType2, inDatasetType)
249 allTypes = set(registry.queryDatasetTypes())
250 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
252 def testDimensions(self):
253 """Tests for `Registry.insertDimensionData`,
254 `Registry.syncDimensionData`, and `Registry.expandDataId`.
255 """
256 registry = self.makeRegistry()
257 dimensionName = "instrument"
258 dimension = registry.dimensions[dimensionName]
259 dimensionValue = {
260 "name": "DummyCam",
261 "visit_max": 10,
262 "exposure_max": 10,
263 "detector_max": 2,
264 "class_name": "lsst.obs.base.Instrument",
265 }
266 registry.insertDimensionData(dimensionName, dimensionValue)
267 # Inserting the same value twice should fail
268 with self.assertRaises(sqlalchemy.exc.IntegrityError):
269 registry.insertDimensionData(dimensionName, dimensionValue)
270 # expandDataId should retrieve the record we just inserted
271 self.assertEqual(
272 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
273 .records[dimensionName]
274 .toDict(),
275 dimensionValue,
276 )
277 # expandDataId should raise if there is no record with the given ID.
278 with self.assertRaises(DataIdValueError):
279 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
280 # band doesn't have a table; insert should fail.
281 with self.assertRaises(TypeError):
282 registry.insertDimensionData("band", {"band": "i"})
283 dimensionName2 = "physical_filter"
284 dimension2 = registry.dimensions[dimensionName2]
285 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
286 # Missing required dependency ("instrument") should fail
287 with self.assertRaises(KeyError):
288 registry.insertDimensionData(dimensionName2, dimensionValue2)
289 # Adding required dependency should fix the failure
290 dimensionValue2["instrument"] = "DummyCam"
291 registry.insertDimensionData(dimensionName2, dimensionValue2)
292 # expandDataId should retrieve the record we just inserted.
293 self.assertEqual(
294 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
295 .records[dimensionName2]
296 .toDict(),
297 dimensionValue2,
298 )
299 # Use syncDimensionData to insert a new record successfully.
300 dimensionName3 = "detector"
301 dimensionValue3 = {
302 "instrument": "DummyCam",
303 "id": 1,
304 "full_name": "one",
305 "name_in_raft": "zero",
306 "purpose": "SCIENCE",
307 }
308 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
309 # Sync that again. Note that one field ("raft") is NULL, and that
310 # should be okay.
311 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
312 # Now try that sync with the same primary key but a different value.
313 # This should fail.
314 with self.assertRaises(ConflictingDefinitionError):
315 registry.syncDimensionData(
316 dimensionName3,
317 {
318 "instrument": "DummyCam",
319 "id": 1,
320 "full_name": "one",
321 "name_in_raft": "four",
322 "purpose": "SCIENCE",
323 },
324 )
326 @unittest.skipIf(np is None, "numpy not available.")
327 def testNumpyDataId(self):
328 """Test that we can use a numpy int in a dataId."""
329 registry = self.makeRegistry()
330 dimensionEntries = [
331 ("instrument", {"instrument": "DummyCam"}),
332 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
333 # Using an np.int64 here fails unless Records.fromDict is also
334 # patched to look for numbers.Integral
335 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
336 ]
337 for args in dimensionEntries:
338 registry.insertDimensionData(*args)
340 # Try a normal integer and something that looks like an int but
341 # is not.
342 for visit_id in (42, np.int64(42)):
343 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
344 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
345 self.assertEqual(expanded["visit"], int(visit_id))
346 self.assertIsInstance(expanded["visit"], int)
348 def testDataIdRelationships(self):
349 """Test that `Registry.expandDataId` raises an exception when the given
350 keys are inconsistent.
351 """
352 registry = self.makeRegistry()
353 self.loadData(registry, "base.yaml")
354 # Insert a few more dimension records for the next test.
355 registry.insertDimensionData(
356 "exposure",
357 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
358 )
359 registry.insertDimensionData(
360 "exposure",
361 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
362 )
363 registry.insertDimensionData(
364 "visit_system",
365 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
366 )
367 registry.insertDimensionData(
368 "visit",
369 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
370 )
371 registry.insertDimensionData(
372 "visit_definition",
373 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
374 )
375 with self.assertRaises(InconsistentDataIdError):
376 registry.expandDataId(
377 {"instrument": "Cam1", "visit": 1, "exposure": 2},
378 )
380 def testDataset(self):
381 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
382 and `Registry.removeDatasets`.
383 """
384 registry = self.makeRegistry()
385 self.loadData(registry, "base.yaml")
386 run = "tésτ"
387 registry.registerRun(run)
388 datasetType = registry.getDatasetType("bias")
389 dataId = {"instrument": "Cam1", "detector": 2}
390 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
391 outRef = registry.getDataset(ref.id)
392 self.assertIsNotNone(ref.id)
393 self.assertEqual(ref, outRef)
394 with self.assertRaises(ConflictingDefinitionError):
395 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
396 registry.removeDatasets([ref])
397 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
399 def testFindDataset(self):
400 """Tests for `Registry.findDataset`."""
401 registry = self.makeRegistry()
402 self.loadData(registry, "base.yaml")
403 run = "tésτ"
404 datasetType = registry.getDatasetType("bias")
405 dataId = {"instrument": "Cam1", "detector": 4}
406 registry.registerRun(run)
407 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
408 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
409 self.assertEqual(outputRef, inputRef)
410 # Check that retrieval with invalid dataId raises
411 with self.assertRaises(LookupError):
412 dataId = {"instrument": "Cam1"} # no detector
413 registry.findDataset(datasetType, dataId, collections=run)
414 # Check that different dataIds match to different datasets
415 dataId1 = {"instrument": "Cam1", "detector": 1}
416 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
417 dataId2 = {"instrument": "Cam1", "detector": 2}
418 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
419 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
420 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
421 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
422 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
423 # Check that requesting a non-existing dataId returns None
424 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
425 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
427 def testRemoveDatasetTypeSuccess(self):
428 """Test that Registry.removeDatasetType works when there are no
429 datasets of that type present.
430 """
431 registry = self.makeRegistry()
432 self.loadData(registry, "base.yaml")
433 registry.removeDatasetType("flat")
434 with self.assertRaises(KeyError):
435 registry.getDatasetType("flat")
437 def testRemoveDatasetTypeFailure(self):
438 """Test that Registry.removeDatasetType raises when there are datasets
439 of that type present or if the dataset type is for a component.
440 """
441 registry = self.makeRegistry()
442 self.loadData(registry, "base.yaml")
443 self.loadData(registry, "datasets.yaml")
444 with self.assertRaises(OrphanedRecordError):
445 registry.removeDatasetType("flat")
446 with self.assertRaises(ValueError):
447 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
449 def testImportDatasetsUUID(self):
450 """Test for `Registry._importDatasets` with UUID dataset ID."""
451 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
452 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
454 registry = self.makeRegistry()
455 self.loadData(registry, "base.yaml")
456 for run in range(6):
457 registry.registerRun(f"run{run}")
458 datasetTypeBias = registry.getDatasetType("bias")
459 datasetTypeFlat = registry.getDatasetType("flat")
460 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
461 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
462 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
464 dataset_id = uuid.uuid4()
465 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
466 (ref1,) = registry._importDatasets([ref])
467 # UUID is used without change
468 self.assertEqual(ref.id, ref1.id)
470 # All different failure modes
471 refs = (
472 # Importing same DatasetRef with different dataset ID is an error
473 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
474 # Same DatasetId but different DataId
475 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
476 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
477 # Same DatasetRef and DatasetId but different run
478 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
479 )
480 for ref in refs:
481 with self.assertRaises(ConflictingDefinitionError):
482 registry._importDatasets([ref])
484 # Test for non-unique IDs, they can be re-imported multiple times.
485 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
486 with self.subTest(idGenMode=idGenMode):
488 # Use integer dataset ID to force UUID calculation in _import
489 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
490 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
491 self.assertIsInstance(ref1.id, uuid.UUID)
492 self.assertEqual(ref1.id.version, 5)
494 # Importing it again is OK
495 (ref2,) = registry._importDatasets([ref1])
496 self.assertEqual(ref2.id, ref1.id)
498 # Cannot import to different run with the same ID
499 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
500 with self.assertRaises(ConflictingDefinitionError):
501 registry._importDatasets([ref])
503 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
504 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
505 # Cannot import same DATAID_TYPE ref into a new run
506 with self.assertRaises(ConflictingDefinitionError):
507 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
508 else:
509 # DATAID_TYPE_RUN ref can be imported into a new run
510 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
512 def testImportDatasetsInt(self):
513 """Test for `Registry._importDatasets` with integer dataset ID."""
514 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
515 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
517 registry = self.makeRegistry()
518 self.loadData(registry, "base.yaml")
519 run = "tésτ"
520 registry.registerRun(run)
521 datasetTypeBias = registry.getDatasetType("bias")
522 datasetTypeFlat = registry.getDatasetType("flat")
523 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
524 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
525 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
526 dataset_id = 999999999
528 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
529 (ref1,) = registry._importDatasets([ref])
530 # Should make new integer ID.
531 self.assertNotEqual(ref1.id, ref.id)
533 # Ingesting same dataId with different dataset ID is an error
534 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
535 with self.assertRaises(ConflictingDefinitionError):
536 registry._importDatasets([ref2])
538 # Ingesting different dataId with the same dataset ID should work
539 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
540 (ref4,) = registry._importDatasets([ref3])
541 self.assertNotEqual(ref4.id, ref1.id)
543 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
544 (ref4,) = registry._importDatasets([ref3])
545 self.assertNotEqual(ref4.id, ref1.id)
547 def testDatasetTypeComponentQueries(self):
548 """Test component options when querying for dataset types."""
549 registry = self.makeRegistry()
550 self.loadData(registry, "base.yaml")
551 self.loadData(registry, "datasets.yaml")
552 # Test querying for dataset types with different inputs.
553 # First query for all dataset types; components should only be included
554 # when components=True.
555 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
556 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
557 self.assertLess(
558 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
559 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
560 )
561 # Use a pattern that can match either parent or components. Again,
562 # components are only returned if components=True.
563 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
564 self.assertEqual(
565 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
566 )
567 self.assertLess(
568 {"bias", "bias.wcs"},
569 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
570 )
571 # This pattern matches only a component. In this case we also return
572 # that component dataset type if components=None.
573 self.assertEqual(
574 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
575 )
576 self.assertEqual(
577 set(),
578 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
579 )
580 self.assertEqual(
581 {"bias.wcs"},
582 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
583 )
584 # Add a dataset type using a StorageClass that we'll then remove; check
585 # that this does not affect our ability to query for dataset types
586 # (though it will warn).
587 tempStorageClass = StorageClass(
588 name="TempStorageClass",
589 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")},
590 )
591 registry.storageClasses.registerStorageClass(tempStorageClass)
592 datasetType = DatasetType(
593 "temporary",
594 dimensions=["instrument"],
595 storageClass=tempStorageClass,
596 universe=registry.dimensions,
597 )
598 registry.registerDatasetType(datasetType)
599 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
600 datasetType._storageClass = None
601 del tempStorageClass
602 # Querying for all dataset types, including components, should include
603 # at least all non-component dataset types (and I don't want to
604 # enumerate all of the Exposure components for bias and flat here).
605 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
606 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
607 self.assertIn("TempStorageClass", cm.output[0])
608 self.assertLess({"bias", "flat", "temporary"}, everything.names)
609 # It should not include "temporary.columns", because we tried to remove
610 # the storage class that would tell it about that. So if the next line
611 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
612 # this part of the test isn't doing anything, because the _unregister
613 # call about isn't simulating the real-life case we want it to
614 # simulate, in which different versions of daf_butler in entirely
615 # different Python processes interact with the same repo.
616 self.assertNotIn("temporary.data", everything.names)
617 # Query for dataset types that start with "temp". This should again
618 # not include the component, and also not fail.
619 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
620 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
621 self.assertIn("TempStorageClass", cm.output[0])
622 self.assertEqual({"temporary"}, startsWithTemp.names)
623 # Querying with no components should not warn at all.
624 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
625 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
626 # Must issue a warning of our own to be captured.
627 logging.getLogger("lsst.daf.butler.registries").warning("test message")
628 self.assertEqual(len(cm.output), 1)
629 self.assertIn("test message", cm.output[0])
631 def testComponentLookups(self):
632 """Test searching for component datasets via their parents."""
633 registry = self.makeRegistry()
634 self.loadData(registry, "base.yaml")
635 self.loadData(registry, "datasets.yaml")
636 # Test getting the child dataset type (which does still exist in the
637 # Registry), and check for consistency with
638 # DatasetRef.makeComponentRef.
639 collection = "imported_g"
640 parentType = registry.getDatasetType("bias")
641 childType = registry.getDatasetType("bias.wcs")
642 parentRefResolved = registry.findDataset(
643 parentType, collections=collection, instrument="Cam1", detector=1
644 )
645 self.assertIsInstance(parentRefResolved, DatasetRef)
646 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
647 # Search for a single dataset with findDataset.
648 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
649 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
650 # Search for detector data IDs constrained by component dataset
651 # existence with queryDataIds.
652 dataIds = registry.queryDataIds(
653 ["detector"],
654 datasets=["bias.wcs"],
655 collections=collection,
656 ).toSet()
657 self.assertEqual(
658 dataIds,
659 DataCoordinateSet(
660 {
661 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
662 for d in (1, 2, 3)
663 },
664 parentType.dimensions,
665 ),
666 )
667 # Search for multiple datasets of a single type with queryDatasets.
668 childRefs2 = set(
669 registry.queryDatasets(
670 "bias.wcs",
671 collections=collection,
672 )
673 )
674 self.assertEqual(
675 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
676 )
678 def testCollections(self):
679 """Tests for registry methods that manage collections."""
680 registry = self.makeRegistry()
681 other_registry = self.makeRegistry(share_repo_with=registry)
682 self.loadData(registry, "base.yaml")
683 self.loadData(registry, "datasets.yaml")
684 run1 = "imported_g"
685 run2 = "imported_r"
686 # Test setting a collection docstring after it has been created.
687 registry.setCollectionDocumentation(run1, "doc for run1")
688 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
689 registry.setCollectionDocumentation(run1, None)
690 self.assertIsNone(registry.getCollectionDocumentation(run1))
691 datasetType = "bias"
692 # Find some datasets via their run's collection.
693 dataId1 = {"instrument": "Cam1", "detector": 1}
694 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
695 self.assertIsNotNone(ref1)
696 dataId2 = {"instrument": "Cam1", "detector": 2}
697 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
698 self.assertIsNotNone(ref2)
699 # Associate those into a new collection, then look for them there.
700 tag1 = "tag1"
701 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
702 # Check that we can query for old and new collections by type.
703 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
704 self.assertEqual(
705 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
706 {tag1, run1, run2},
707 )
708 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
709 registry.associate(tag1, [ref1, ref2])
710 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
711 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
712 # Disassociate one and verify that we can't it there anymore...
713 registry.disassociate(tag1, [ref1])
714 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
715 # ...but we can still find ref2 in tag1, and ref1 in the run.
716 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
717 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
718 collections = set(registry.queryCollections())
719 self.assertEqual(collections, {run1, run2, tag1})
720 # Associate both refs into tag1 again; ref2 is already there, but that
721 # should be a harmless no-op.
722 registry.associate(tag1, [ref1, ref2])
723 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
724 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
725 # Get a different dataset (from a different run) that has the same
726 # dataset type and data ID as ref2.
727 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
728 self.assertNotEqual(ref2, ref2b)
729 # Attempting to associate that into tag1 should be an error.
730 with self.assertRaises(ConflictingDefinitionError):
731 registry.associate(tag1, [ref2b])
732 # That error shouldn't have messed up what we had before.
733 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
734 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
735 # Attempt to associate the conflicting dataset again, this time with
736 # a dataset that isn't in the collection and won't cause a conflict.
737 # Should also fail without modifying anything.
738 dataId3 = {"instrument": "Cam1", "detector": 3}
739 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
740 with self.assertRaises(ConflictingDefinitionError):
741 registry.associate(tag1, [ref3, ref2b])
742 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
743 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
744 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
745 # Register a chained collection that searches [tag1, run2]
746 chain1 = "chain1"
747 registry.registerCollection(chain1, type=CollectionType.CHAINED)
748 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
749 # Chained collection exists, but has no collections in it.
750 self.assertFalse(registry.getCollectionChain(chain1))
751 # If we query for all collections, we should get the chained collection
752 # only if we don't ask to flatten it (i.e. yield only its children).
753 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
754 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
755 # Attempt to set its child collections to something circular; that
756 # should fail.
757 with self.assertRaises(ValueError):
758 registry.setCollectionChain(chain1, [tag1, chain1])
759 # Add the child collections.
760 registry.setCollectionChain(chain1, [tag1, run2])
761 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
762 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
763 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
764 # Refresh the other registry that points to the same repo, and make
765 # sure it can see the things we've done (note that this does require
766 # an explicit refresh(); that's the documented behavior, because
767 # caching is ~impossible otherwise).
768 if other_registry is not None:
769 other_registry.refresh()
770 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
771 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
772 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
773 # Searching for dataId1 or dataId2 in the chain should return ref1 and
774 # ref2, because both are in tag1.
775 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
776 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
777 # Now disassociate ref2 from tag1. The search (for bias) with
778 # dataId2 in chain1 should then:
779 # 1. not find it in tag1
780 # 2. find a different dataset in run2
781 registry.disassociate(tag1, [ref2])
782 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
783 self.assertNotEqual(ref2b, ref2)
784 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
785 # Define a new chain so we can test recursive chains.
786 chain2 = "chain2"
787 registry.registerCollection(chain2, type=CollectionType.CHAINED)
788 registry.setCollectionChain(chain2, [run2, chain1])
789 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
790 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
791 # Query for collections matching a regex.
792 self.assertCountEqual(
793 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
794 ["imported_r", "imported_g"],
795 )
796 # Query for collections matching a regex or an explicit str.
797 self.assertCountEqual(
798 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
799 ["imported_r", "imported_g", "chain1"],
800 )
801 # Search for bias with dataId1 should find it via tag1 in chain2,
802 # recursing, because is not in run1.
803 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
804 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
805 # Search for bias with dataId2 should find it in run2 (ref2b).
806 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
807 # Search for a flat that is in run2. That should not be found
808 # at the front of chain2, because of the restriction to bias
809 # on run2 there, but it should be found in at the end of chain1.
810 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
811 ref4 = registry.findDataset("flat", dataId4, collections=run2)
812 self.assertIsNotNone(ref4)
813 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
814 # Deleting a collection that's part of a CHAINED collection is not
815 # allowed, and is exception-safe.
816 with self.assertRaises(Exception):
817 registry.removeCollection(run2)
818 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
819 with self.assertRaises(Exception):
820 registry.removeCollection(chain1)
821 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
822 # Actually remove chain2, test that it's gone by asking for its type.
823 registry.removeCollection(chain2)
824 with self.assertRaises(MissingCollectionError):
825 registry.getCollectionType(chain2)
826 # Actually remove run2 and chain1, which should work now.
827 registry.removeCollection(chain1)
828 registry.removeCollection(run2)
829 with self.assertRaises(MissingCollectionError):
830 registry.getCollectionType(run2)
831 with self.assertRaises(MissingCollectionError):
832 registry.getCollectionType(chain1)
833 # Remove tag1 as well, just to test that we can remove TAGGED
834 # collections.
835 registry.removeCollection(tag1)
836 with self.assertRaises(MissingCollectionError):
837 registry.getCollectionType(tag1)
839 def testCollectionChainFlatten(self):
840 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
841 registry = self.makeRegistry()
842 registry.registerCollection("inner", CollectionType.CHAINED)
843 registry.registerCollection("innermost", CollectionType.RUN)
844 registry.setCollectionChain("inner", ["innermost"])
845 registry.registerCollection("outer", CollectionType.CHAINED)
846 registry.setCollectionChain("outer", ["inner"], flatten=False)
847 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
848 registry.setCollectionChain("outer", ["inner"], flatten=True)
849 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
851 def testBasicTransaction(self):
852 """Test that all operations within a single transaction block are
853 rolled back if an exception propagates out of the block.
854 """
855 registry = self.makeRegistry()
856 storageClass = StorageClass("testDatasetType")
857 registry.storageClasses.registerStorageClass(storageClass)
858 with registry.transaction():
859 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
860 with self.assertRaises(ValueError):
861 with registry.transaction():
862 registry.insertDimensionData("instrument", {"name": "Cam2"})
863 raise ValueError("Oops, something went wrong")
864 # Cam1 should exist
865 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
866 # But Cam2 and Cam3 should both not exist
867 with self.assertRaises(DataIdValueError):
868 registry.expandDataId(instrument="Cam2")
869 with self.assertRaises(DataIdValueError):
870 registry.expandDataId(instrument="Cam3")
872 def testNestedTransaction(self):
873 """Test that operations within a transaction block are not rolled back
874 if an exception propagates out of an inner transaction block and is
875 then caught.
876 """
877 registry = self.makeRegistry()
878 dimension = registry.dimensions["instrument"]
879 dataId1 = {"instrument": "DummyCam"}
880 dataId2 = {"instrument": "DummyCam2"}
881 checkpointReached = False
882 with registry.transaction():
883 # This should be added and (ultimately) committed.
884 registry.insertDimensionData(dimension, dataId1)
885 with self.assertRaises(sqlalchemy.exc.IntegrityError):
886 with registry.transaction(savepoint=True):
887 # This does not conflict, and should succeed (but not
888 # be committed).
889 registry.insertDimensionData(dimension, dataId2)
890 checkpointReached = True
891 # This should conflict and raise, triggerring a rollback
892 # of the previous insertion within the same transaction
893 # context, but not the original insertion in the outer
894 # block.
895 registry.insertDimensionData(dimension, dataId1)
896 self.assertTrue(checkpointReached)
897 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
898 with self.assertRaises(DataIdValueError):
899 registry.expandDataId(dataId2, graph=dimension.graph)
901 def testInstrumentDimensions(self):
902 """Test queries involving only instrument dimensions, with no joins to
903 skymap."""
904 registry = self.makeRegistry()
906 # need a bunch of dimensions and datasets for test
907 registry.insertDimensionData(
908 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
909 )
910 registry.insertDimensionData(
911 "physical_filter",
912 dict(instrument="DummyCam", name="dummy_r", band="r"),
913 dict(instrument="DummyCam", name="dummy_i", band="i"),
914 )
915 registry.insertDimensionData(
916 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
917 )
918 registry.insertDimensionData(
919 "visit_system",
920 dict(instrument="DummyCam", id=1, name="default"),
921 )
922 registry.insertDimensionData(
923 "visit",
924 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
925 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
926 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
927 )
928 registry.insertDimensionData(
929 "exposure",
930 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
931 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
932 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
933 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
934 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
935 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
936 )
937 registry.insertDimensionData(
938 "visit_definition",
939 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
940 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
941 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
942 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
943 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
944 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
945 )
946 # dataset types
947 run1 = "test1_r"
948 run2 = "test2_r"
949 tagged2 = "test2_t"
950 registry.registerRun(run1)
951 registry.registerRun(run2)
952 registry.registerCollection(tagged2)
953 storageClass = StorageClass("testDataset")
954 registry.storageClasses.registerStorageClass(storageClass)
955 rawType = DatasetType(
956 name="RAW",
957 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
958 storageClass=storageClass,
959 )
960 registry.registerDatasetType(rawType)
961 calexpType = DatasetType(
962 name="CALEXP",
963 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
964 storageClass=storageClass,
965 )
966 registry.registerDatasetType(calexpType)
968 # add pre-existing datasets
969 for exposure in (100, 101, 110, 111):
970 for detector in (1, 2, 3):
971 # note that only 3 of 5 detectors have datasets
972 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
973 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
974 # exposures 100 and 101 appear in both run1 and tagged2.
975 # 100 has different datasets in the different collections
976 # 101 has the same dataset in both collections.
977 if exposure == 100:
978 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
979 if exposure in (100, 101):
980 registry.associate(tagged2, [ref])
981 # Add pre-existing datasets to tagged2.
982 for exposure in (200, 201):
983 for detector in (3, 4, 5):
984 # note that only 3 of 5 detectors have datasets
985 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
986 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
987 registry.associate(tagged2, [ref])
989 dimensions = DimensionGraph(
990 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
991 )
992 # Test that single dim string works as well as list of str
993 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
994 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
995 self.assertEqual(rows, rowsI)
996 # with empty expression
997 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
998 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
999 for dataId in rows:
1000 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1001 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
1002 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
1003 self.assertEqual(
1004 packer1.unpack(packer1.pack(dataId)),
1005 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1006 )
1007 self.assertEqual(
1008 packer2.unpack(packer2.pack(dataId)),
1009 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1010 )
1011 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1012 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1013 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1014 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1016 # second collection
1017 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1018 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1019 for dataId in rows:
1020 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1021 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1022 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1023 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1025 # with two input datasets
1026 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1027 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1028 for dataId in rows:
1029 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1030 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1031 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1032 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1034 # limit to single visit
1035 rows = registry.queryDataIds(
1036 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1037 ).toSet()
1038 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1039 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1040 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1041 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1043 # more limiting expression, using link names instead of Table.column
1044 rows = registry.queryDataIds(
1045 dimensions,
1046 datasets=rawType,
1047 collections=run1,
1048 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1049 ).toSet()
1050 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1051 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1052 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1053 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1055 # queryDataIds with only one of `datasets` and `collections` is an
1056 # error.
1057 with self.assertRaises(CollectionError):
1058 registry.queryDataIds(dimensions, datasets=rawType)
1059 with self.assertRaises(ArgumentError):
1060 registry.queryDataIds(dimensions, collections=run1)
1062 # expression excludes everything
1063 rows = registry.queryDataIds(
1064 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1065 ).toSet()
1066 self.assertEqual(len(rows), 0)
1068 # Selecting by physical_filter, this is not in the dimensions, but it
1069 # is a part of the full expression so it should work too.
1070 rows = registry.queryDataIds(
1071 dimensions,
1072 datasets=rawType,
1073 collections=run1,
1074 where="physical_filter = 'dummy_r'",
1075 instrument="DummyCam",
1076 ).toSet()
1077 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1078 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1079 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1080 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1082 def testSkyMapDimensions(self):
1083 """Tests involving only skymap dimensions, no joins to instrument."""
1084 registry = self.makeRegistry()
1086 # need a bunch of dimensions and datasets for test, we want
1087 # "band" in the test so also have to add physical_filter
1088 # dimensions
1089 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1090 registry.insertDimensionData(
1091 "physical_filter",
1092 dict(instrument="DummyCam", name="dummy_r", band="r"),
1093 dict(instrument="DummyCam", name="dummy_i", band="i"),
1094 )
1095 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1096 for tract in range(10):
1097 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1098 registry.insertDimensionData(
1099 "patch",
1100 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1101 )
1103 # dataset types
1104 run = "tésτ"
1105 registry.registerRun(run)
1106 storageClass = StorageClass("testDataset")
1107 registry.storageClasses.registerStorageClass(storageClass)
1108 calexpType = DatasetType(
1109 name="deepCoadd_calexp",
1110 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1111 storageClass=storageClass,
1112 )
1113 registry.registerDatasetType(calexpType)
1114 mergeType = DatasetType(
1115 name="deepCoadd_mergeDet",
1116 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1117 storageClass=storageClass,
1118 )
1119 registry.registerDatasetType(mergeType)
1120 measType = DatasetType(
1121 name="deepCoadd_meas",
1122 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1123 storageClass=storageClass,
1124 )
1125 registry.registerDatasetType(measType)
1127 dimensions = DimensionGraph(
1128 registry.dimensions,
1129 dimensions=(
1130 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1131 ),
1132 )
1134 # add pre-existing datasets
1135 for tract in (1, 3, 5):
1136 for patch in (2, 4, 6, 7):
1137 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1138 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1139 for aFilter in ("i", "r"):
1140 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1141 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1143 # with empty expression
1144 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1145 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1146 for dataId in rows:
1147 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1148 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1149 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1150 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1152 # limit to 2 tracts and 2 patches
1153 rows = registry.queryDataIds(
1154 dimensions,
1155 datasets=[calexpType, mergeType],
1156 collections=run,
1157 where="tract IN (1, 5) AND patch IN (2, 7)",
1158 skymap="DummyMap",
1159 ).toSet()
1160 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1161 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1162 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1163 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1165 # limit to single filter
1166 rows = registry.queryDataIds(
1167 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1168 ).toSet()
1169 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1170 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1171 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1172 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1174 # Specifying non-existing skymap is an exception
1175 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1176 rows = registry.queryDataIds(
1177 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1178 ).toSet()
1180 def testSpatialJoin(self):
1181 """Test queries that involve spatial overlap joins."""
1182 registry = self.makeRegistry()
1183 self.loadData(registry, "hsc-rc2-subset.yaml")
1185 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1186 # the TopologicalFamily they belong to. We'll relate all elements in
1187 # each family to all of the elements in each other family.
1188 families = defaultdict(set)
1189 # Dictionary of {element.name: {dataId: region}}.
1190 regions = {}
1191 for element in registry.dimensions.getDatabaseElements():
1192 if element.spatial is not None:
1193 families[element.spatial.name].add(element)
1194 regions[element.name] = {
1195 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1196 }
1198 # If this check fails, it's not necessarily a problem - it may just be
1199 # a reasonable change to the default dimension definitions - but the
1200 # test below depends on there being more than one family to do anything
1201 # useful.
1202 self.assertEqual(len(families), 2)
1204 # Overlap DatabaseDimensionElements with each other.
1205 for family1, family2 in itertools.combinations(families, 2):
1206 for element1, element2 in itertools.product(families[family1], families[family2]):
1207 graph = DimensionGraph.union(element1.graph, element2.graph)
1208 # Construct expected set of overlapping data IDs via a
1209 # brute-force comparison of the regions we've already fetched.
1210 expected = {
1211 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1212 for (dataId1, region1), (dataId2, region2) in itertools.product(
1213 regions[element1.name].items(), regions[element2.name].items()
1214 )
1215 if not region1.isDisjointFrom(region2)
1216 }
1217 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1218 queried = set(registry.queryDataIds(graph))
1219 self.assertEqual(expected, queried)
1221 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1222 commonSkyPix = registry.dimensions.commonSkyPix
1223 for elementName, regions in regions.items():
1224 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1225 expected = set()
1226 for dataId, region in regions.items():
1227 for begin, end in commonSkyPix.pixelization.envelope(region):
1228 expected.update(
1229 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1230 for index in range(begin, end)
1231 )
1232 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1233 queried = set(registry.queryDataIds(graph))
1234 self.assertEqual(expected, queried)
1236 def testAbstractQuery(self):
1237 """Test that we can run a query that just lists the known
1238 bands. This is tricky because band is
1239 backed by a query against physical_filter.
1240 """
1241 registry = self.makeRegistry()
1242 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1243 registry.insertDimensionData(
1244 "physical_filter",
1245 dict(instrument="DummyCam", name="dummy_i", band="i"),
1246 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1247 dict(instrument="DummyCam", name="dummy_r", band="r"),
1248 )
1249 rows = registry.queryDataIds(["band"]).toSet()
1250 self.assertCountEqual(
1251 rows,
1252 [
1253 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1254 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1255 ],
1256 )
1258 def testAttributeManager(self):
1259 """Test basic functionality of attribute manager."""
1260 # number of attributes with schema versions in a fresh database,
1261 # 6 managers with 3 records per manager, plus config for dimensions
1262 VERSION_COUNT = 6 * 3 + 1
1264 registry = self.makeRegistry()
1265 attributes = registry._managers.attributes
1267 # check what get() returns for non-existing key
1268 self.assertIsNone(attributes.get("attr"))
1269 self.assertEqual(attributes.get("attr", ""), "")
1270 self.assertEqual(attributes.get("attr", "Value"), "Value")
1271 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1273 # cannot store empty key or value
1274 with self.assertRaises(ValueError):
1275 attributes.set("", "value")
1276 with self.assertRaises(ValueError):
1277 attributes.set("attr", "")
1279 # set value of non-existing key
1280 attributes.set("attr", "value")
1281 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1282 self.assertEqual(attributes.get("attr"), "value")
1284 # update value of existing key
1285 with self.assertRaises(ButlerAttributeExistsError):
1286 attributes.set("attr", "value2")
1288 attributes.set("attr", "value2", force=True)
1289 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1290 self.assertEqual(attributes.get("attr"), "value2")
1292 # delete existing key
1293 self.assertTrue(attributes.delete("attr"))
1294 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1296 # delete non-existing key
1297 self.assertFalse(attributes.delete("non-attr"))
1299 # store bunch of keys and get the list back
1300 data = [
1301 ("version.core", "1.2.3"),
1302 ("version.dimensions", "3.2.1"),
1303 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1304 ]
1305 for key, value in data:
1306 attributes.set(key, value)
1307 items = dict(attributes.items())
1308 for key, value in data:
1309 self.assertEqual(items[key], value)
1311 def testQueryDatasetsDeduplication(self):
1312 """Test that the findFirst option to queryDatasets selects datasets
1313 from collections in the order given".
1314 """
1315 registry = self.makeRegistry()
1316 self.loadData(registry, "base.yaml")
1317 self.loadData(registry, "datasets.yaml")
1318 self.assertCountEqual(
1319 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1320 [
1321 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1322 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1323 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1324 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1325 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1326 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1327 ],
1328 )
1329 self.assertCountEqual(
1330 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1331 [
1332 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1333 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1334 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1335 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1336 ],
1337 )
1338 self.assertCountEqual(
1339 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1340 [
1341 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1342 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1343 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1344 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1345 ],
1346 )
1348 def testQueryResults(self):
1349 """Test querying for data IDs and then manipulating the QueryResults
1350 object returned to perform other queries.
1351 """
1352 registry = self.makeRegistry()
1353 self.loadData(registry, "base.yaml")
1354 self.loadData(registry, "datasets.yaml")
1355 bias = registry.getDatasetType("bias")
1356 flat = registry.getDatasetType("flat")
1357 # Obtain expected results from methods other than those we're testing
1358 # here. That includes:
1359 # - the dimensions of the data IDs we want to query:
1360 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1361 # - the dimensions of some other data IDs we'll extract from that:
1362 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1363 # - the data IDs we expect to obtain from the first queries:
1364 expectedDataIds = DataCoordinateSet(
1365 {
1366 DataCoordinate.standardize(
1367 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1368 )
1369 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1370 },
1371 graph=expectedGraph,
1372 hasFull=False,
1373 hasRecords=False,
1374 )
1375 # - the flat datasets we expect to find from those data IDs, in just
1376 # one collection (so deduplication is irrelevant):
1377 expectedFlats = [
1378 registry.findDataset(
1379 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1380 ),
1381 registry.findDataset(
1382 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1383 ),
1384 registry.findDataset(
1385 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1386 ),
1387 ]
1388 # - the data IDs we expect to extract from that:
1389 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1390 # - the bias datasets we expect to find from those data IDs, after we
1391 # subset-out the physical_filter dimension, both with duplicates:
1392 expectedAllBiases = [
1393 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1394 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1395 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1396 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1397 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1398 ]
1399 # - ...and without duplicates:
1400 expectedDeduplicatedBiases = [
1401 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1402 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1403 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1404 ]
1405 # Test against those expected results, using a "lazy" query for the
1406 # data IDs (which re-executes that query each time we use it to do
1407 # something new).
1408 dataIds = registry.queryDataIds(
1409 ["detector", "physical_filter"],
1410 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1411 instrument="Cam1",
1412 )
1413 self.assertEqual(dataIds.graph, expectedGraph)
1414 self.assertEqual(dataIds.toSet(), expectedDataIds)
1415 self.assertCountEqual(
1416 list(
1417 dataIds.findDatasets(
1418 flat,
1419 collections=["imported_r"],
1420 )
1421 ),
1422 expectedFlats,
1423 )
1424 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1425 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1426 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1427 self.assertCountEqual(
1428 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1429 expectedAllBiases,
1430 )
1431 self.assertCountEqual(
1432 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1433 expectedDeduplicatedBiases,
1434 )
1435 # Materialize the bias dataset queries (only) by putting the results
1436 # into temporary tables, then repeat those tests.
1437 with subsetDataIds.findDatasets(
1438 bias, collections=["imported_r", "imported_g"], findFirst=False
1439 ).materialize() as biases:
1440 self.assertCountEqual(list(biases), expectedAllBiases)
1441 with subsetDataIds.findDatasets(
1442 bias, collections=["imported_r", "imported_g"], findFirst=True
1443 ).materialize() as biases:
1444 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1445 # Materialize the data ID subset query, but not the dataset queries.
1446 with subsetDataIds.materialize() as subsetDataIds:
1447 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1448 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1449 self.assertCountEqual(
1450 list(
1451 subsetDataIds.findDatasets(
1452 bias, collections=["imported_r", "imported_g"], findFirst=False
1453 )
1454 ),
1455 expectedAllBiases,
1456 )
1457 self.assertCountEqual(
1458 list(
1459 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1460 ),
1461 expectedDeduplicatedBiases,
1462 )
1463 # Materialize the dataset queries, too.
1464 with subsetDataIds.findDatasets(
1465 bias, collections=["imported_r", "imported_g"], findFirst=False
1466 ).materialize() as biases:
1467 self.assertCountEqual(list(biases), expectedAllBiases)
1468 with subsetDataIds.findDatasets(
1469 bias, collections=["imported_r", "imported_g"], findFirst=True
1470 ).materialize() as biases:
1471 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1472 # Materialize the original query, but none of the follow-up queries.
1473 with dataIds.materialize() as dataIds:
1474 self.assertEqual(dataIds.graph, expectedGraph)
1475 self.assertEqual(dataIds.toSet(), expectedDataIds)
1476 self.assertCountEqual(
1477 list(
1478 dataIds.findDatasets(
1479 flat,
1480 collections=["imported_r"],
1481 )
1482 ),
1483 expectedFlats,
1484 )
1485 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1486 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1487 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1488 self.assertCountEqual(
1489 list(
1490 subsetDataIds.findDatasets(
1491 bias, collections=["imported_r", "imported_g"], findFirst=False
1492 )
1493 ),
1494 expectedAllBiases,
1495 )
1496 self.assertCountEqual(
1497 list(
1498 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1499 ),
1500 expectedDeduplicatedBiases,
1501 )
1502 # Materialize just the bias dataset queries.
1503 with subsetDataIds.findDatasets(
1504 bias, collections=["imported_r", "imported_g"], findFirst=False
1505 ).materialize() as biases:
1506 self.assertCountEqual(list(biases), expectedAllBiases)
1507 with subsetDataIds.findDatasets(
1508 bias, collections=["imported_r", "imported_g"], findFirst=True
1509 ).materialize() as biases:
1510 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1511 # Materialize the subset data ID query, but not the dataset
1512 # queries.
1513 with subsetDataIds.materialize() as subsetDataIds:
1514 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1515 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1516 self.assertCountEqual(
1517 list(
1518 subsetDataIds.findDatasets(
1519 bias, collections=["imported_r", "imported_g"], findFirst=False
1520 )
1521 ),
1522 expectedAllBiases,
1523 )
1524 self.assertCountEqual(
1525 list(
1526 subsetDataIds.findDatasets(
1527 bias, collections=["imported_r", "imported_g"], findFirst=True
1528 )
1529 ),
1530 expectedDeduplicatedBiases,
1531 )
1532 # Materialize the bias dataset queries, too, so now we're
1533 # materializing every single step.
1534 with subsetDataIds.findDatasets(
1535 bias, collections=["imported_r", "imported_g"], findFirst=False
1536 ).materialize() as biases:
1537 self.assertCountEqual(list(biases), expectedAllBiases)
1538 with subsetDataIds.findDatasets(
1539 bias, collections=["imported_r", "imported_g"], findFirst=True
1540 ).materialize() as biases:
1541 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1543 def testEmptyDimensionsQueries(self):
1544 """Test Query and QueryResults objects in the case where there are no
1545 dimensions.
1546 """
1547 # Set up test data: one dataset type, two runs, one dataset in each.
1548 registry = self.makeRegistry()
1549 self.loadData(registry, "base.yaml")
1550 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1551 registry.registerDatasetType(schema)
1552 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1553 run1 = "run1"
1554 run2 = "run2"
1555 registry.registerRun(run1)
1556 registry.registerRun(run2)
1557 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1558 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1559 # Query directly for both of the datasets, and each one, one at a time.
1560 self.checkQueryResults(
1561 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1562 )
1563 self.checkQueryResults(
1564 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1565 [dataset1],
1566 )
1567 self.checkQueryResults(
1568 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1569 [dataset2],
1570 )
1571 # Query for data IDs with no dimensions.
1572 dataIds = registry.queryDataIds([])
1573 self.checkQueryResults(dataIds, [dataId])
1574 # Use queried data IDs to find the datasets.
1575 self.checkQueryResults(
1576 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1577 [dataset1, dataset2],
1578 )
1579 self.checkQueryResults(
1580 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1581 [dataset1],
1582 )
1583 self.checkQueryResults(
1584 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1585 [dataset2],
1586 )
1587 # Now materialize the data ID query results and repeat those tests.
1588 with dataIds.materialize() as dataIds:
1589 self.checkQueryResults(dataIds, [dataId])
1590 self.checkQueryResults(
1591 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1592 [dataset1],
1593 )
1594 self.checkQueryResults(
1595 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1596 [dataset2],
1597 )
1598 # Query for non-empty data IDs, then subset that to get the empty one.
1599 # Repeat the above tests starting from that.
1600 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1601 self.checkQueryResults(dataIds, [dataId])
1602 self.checkQueryResults(
1603 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1604 [dataset1, dataset2],
1605 )
1606 self.checkQueryResults(
1607 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1608 [dataset1],
1609 )
1610 self.checkQueryResults(
1611 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1612 [dataset2],
1613 )
1614 with dataIds.materialize() as dataIds:
1615 self.checkQueryResults(dataIds, [dataId])
1616 self.checkQueryResults(
1617 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1618 [dataset1, dataset2],
1619 )
1620 self.checkQueryResults(
1621 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1622 [dataset1],
1623 )
1624 self.checkQueryResults(
1625 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1626 [dataset2],
1627 )
1628 # Query for non-empty data IDs, then materialize, then subset to get
1629 # the empty one. Repeat again.
1630 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1631 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1632 self.checkQueryResults(dataIds, [dataId])
1633 self.checkQueryResults(
1634 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1635 [dataset1, dataset2],
1636 )
1637 self.checkQueryResults(
1638 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1639 [dataset1],
1640 )
1641 self.checkQueryResults(
1642 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1643 [dataset2],
1644 )
1645 with dataIds.materialize() as dataIds:
1646 self.checkQueryResults(dataIds, [dataId])
1647 self.checkQueryResults(
1648 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1649 [dataset1, dataset2],
1650 )
1651 self.checkQueryResults(
1652 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1653 [dataset1],
1654 )
1655 self.checkQueryResults(
1656 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1657 [dataset2],
1658 )
1659 # Query for non-empty data IDs with a constraint on an empty-data-ID
1660 # dataset that exists.
1661 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1662 self.checkQueryResults(
1663 dataIds.subset(unique=True),
1664 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1665 )
1666 # Again query for non-empty data IDs with a constraint on empty-data-ID
1667 # datasets, but when the datasets don't exist. We delete the existing
1668 # dataset and query just that collection rather than creating a new
1669 # empty collection because this is a bit less likely for our build-time
1670 # logic to shortcut-out (via the collection summaries), and such a
1671 # shortcut would make this test a bit more trivial than we'd like.
1672 registry.removeDatasets([dataset2])
1673 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1674 self.checkQueryResults(dataIds, [])
1676 def testDimensionDataModifications(self):
1677 """Test that modifying dimension records via:
1678 syncDimensionData(..., update=True) and
1679 insertDimensionData(..., replace=True) works as expected, even in the
1680 presence of datasets using those dimensions and spatial overlap
1681 relationships.
1682 """
1684 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1685 """Unpack a sphgeom.RangeSet into the integers it contains."""
1686 for begin, end in ranges:
1687 yield from range(begin, end)
1689 def range_set_hull(
1690 ranges: lsst.sphgeom.RangeSet,
1691 pixelization: lsst.sphgeom.HtmPixelization,
1692 ) -> lsst.sphgeom.ConvexPolygon:
1693 """Create a ConvexPolygon hull of the region defined by a set of
1694 HTM pixelization index ranges.
1695 """
1696 points = []
1697 for index in unpack_range_set(ranges):
1698 points.extend(pixelization.triangle(index).getVertices())
1699 return lsst.sphgeom.ConvexPolygon(points)
1701 # Use HTM to set up an initial parent region (one arbitrary trixel)
1702 # and four child regions (the trixels within the parent at the next
1703 # level. We'll use the parent as a tract/visit region and the children
1704 # as its patch/visit_detector regions.
1705 registry = self.makeRegistry()
1706 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1707 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1708 index = 12288
1709 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1710 assert htm6.universe().contains(child_ranges_small)
1711 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1712 parent_region_small = lsst.sphgeom.ConvexPolygon(
1713 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1714 )
1715 assert all(parent_region_small.contains(c) for c in child_regions_small)
1716 # Make a larger version of each child region, defined to be the set of
1717 # htm6 trixels that overlap the original's bounding circle. Make a new
1718 # parent that's the convex hull of the new children.
1719 child_regions_large = [
1720 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1721 ]
1722 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1723 parent_region_large = lsst.sphgeom.ConvexPolygon(
1724 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1725 )
1726 assert all(parent_region_large.contains(c) for c in child_regions_large)
1727 assert parent_region_large.contains(parent_region_small)
1728 assert not parent_region_small.contains(parent_region_large)
1729 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1730 # Find some commonSkyPix indices that overlap the large regions but not
1731 # overlap the small regions. We use commonSkyPix here to make sure the
1732 # real tests later involve what's in the database, not just post-query
1733 # region filtering.
1734 child_difference_indices = []
1735 for large, small in zip(child_regions_large, child_regions_small):
1736 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1737 assert difference, "if this is empty, we can't test anything useful with these regions"
1738 assert all(
1739 not commonSkyPix.triangle(d).isDisjointFrom(large)
1740 and commonSkyPix.triangle(d).isDisjointFrom(small)
1741 for d in difference
1742 )
1743 child_difference_indices.append(difference)
1744 parent_difference_indices = list(
1745 unpack_range_set(
1746 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1747 )
1748 )
1749 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1750 assert all(
1751 (
1752 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1753 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1754 )
1755 for d in parent_difference_indices
1756 )
1757 # Now that we've finally got those regions, we'll insert the large ones
1758 # as tract/patch dimension records.
1759 skymap_name = "testing_v1"
1760 registry.insertDimensionData(
1761 "skymap",
1762 {
1763 "name": skymap_name,
1764 "hash": bytes([42]),
1765 "tract_max": 1,
1766 "patch_nx_max": 2,
1767 "patch_ny_max": 2,
1768 },
1769 )
1770 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1771 registry.insertDimensionData(
1772 "patch",
1773 *[
1774 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1775 for n, c in enumerate(child_regions_large)
1776 ],
1777 )
1778 # Add at dataset that uses these dimensions to make sure that modifying
1779 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1780 # implement insert with replace=True as delete-then-insert).
1781 dataset_type = DatasetType(
1782 "coadd",
1783 dimensions=["tract", "patch"],
1784 universe=registry.dimensions,
1785 storageClass="Exposure",
1786 )
1787 registry.registerDatasetType(dataset_type)
1788 registry.registerCollection("the_run", CollectionType.RUN)
1789 registry.insertDatasets(
1790 dataset_type,
1791 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1792 run="the_run",
1793 )
1794 # Query for tracts and patches that overlap some "difference" htm9
1795 # pixels; there should be overlaps, because the database has
1796 # the "large" suite of regions.
1797 self.assertEqual(
1798 {0},
1799 {
1800 data_id["tract"]
1801 for data_id in registry.queryDataIds(
1802 ["tract"],
1803 skymap=skymap_name,
1804 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1805 )
1806 },
1807 )
1808 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1809 self.assertIn(
1810 patch_id,
1811 {
1812 data_id["patch"]
1813 for data_id in registry.queryDataIds(
1814 ["patch"],
1815 skymap=skymap_name,
1816 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1817 )
1818 },
1819 )
1820 # Use sync to update the tract region and insert to update the patch
1821 # regions, to the "small" suite.
1822 updated = registry.syncDimensionData(
1823 "tract",
1824 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1825 update=True,
1826 )
1827 self.assertEqual(updated, {"region": parent_region_large})
1828 registry.insertDimensionData(
1829 "patch",
1830 *[
1831 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1832 for n, c in enumerate(child_regions_small)
1833 ],
1834 replace=True,
1835 )
1836 # Query again; there now should be no such overlaps, because the
1837 # database has the "small" suite of regions.
1838 self.assertFalse(
1839 set(
1840 registry.queryDataIds(
1841 ["tract"],
1842 skymap=skymap_name,
1843 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1844 )
1845 )
1846 )
1847 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1848 self.assertNotIn(
1849 patch_id,
1850 {
1851 data_id["patch"]
1852 for data_id in registry.queryDataIds(
1853 ["patch"],
1854 skymap=skymap_name,
1855 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1856 )
1857 },
1858 )
1859 # Update back to the large regions and query one more time.
1860 updated = registry.syncDimensionData(
1861 "tract",
1862 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1863 update=True,
1864 )
1865 self.assertEqual(updated, {"region": parent_region_small})
1866 registry.insertDimensionData(
1867 "patch",
1868 *[
1869 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1870 for n, c in enumerate(child_regions_large)
1871 ],
1872 replace=True,
1873 )
1874 self.assertEqual(
1875 {0},
1876 {
1877 data_id["tract"]
1878 for data_id in registry.queryDataIds(
1879 ["tract"],
1880 skymap=skymap_name,
1881 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1882 )
1883 },
1884 )
1885 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1886 self.assertIn(
1887 patch_id,
1888 {
1889 data_id["patch"]
1890 for data_id in registry.queryDataIds(
1891 ["patch"],
1892 skymap=skymap_name,
1893 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1894 )
1895 },
1896 )
1898 def testCalibrationCollections(self):
1899 """Test operations on `~CollectionType.CALIBRATION` collections,
1900 including `Registry.certify`, `Registry.decertify`, and
1901 `Registry.findDataset`.
1902 """
1903 # Setup - make a Registry, fill it with some datasets in
1904 # non-calibration collections.
1905 registry = self.makeRegistry()
1906 self.loadData(registry, "base.yaml")
1907 self.loadData(registry, "datasets.yaml")
1908 # Set up some timestamps.
1909 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1910 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1911 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1912 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1913 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1914 allTimespans = [
1915 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1916 ]
1917 # Get references to some datasets.
1918 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1919 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1920 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1921 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1922 # Register the main calibration collection we'll be working with.
1923 collection = "Cam1/calibs/default"
1924 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1925 # Cannot associate into a calibration collection (no timespan).
1926 with self.assertRaises(CollectionTypeError):
1927 registry.associate(collection, [bias2a])
1928 # Certify 2a dataset with [t2, t4) validity.
1929 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1930 # Test that we can query for this dataset via the new collection, both
1931 # on its own and with a RUN collection, as long as we don't try to join
1932 # in temporal dimensions or use findFirst=True.
1933 self.assertEqual(
1934 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
1935 {bias2a},
1936 )
1937 self.assertEqual(
1938 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
1939 {
1940 bias2a,
1941 bias2b,
1942 bias3b,
1943 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1944 },
1945 )
1946 self.assertEqual(
1947 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
1948 {registry.expandDataId(instrument="Cam1", detector=2)},
1949 )
1950 self.assertEqual(
1951 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
1952 {
1953 registry.expandDataId(instrument="Cam1", detector=2),
1954 registry.expandDataId(instrument="Cam1", detector=3),
1955 registry.expandDataId(instrument="Cam1", detector=4),
1956 },
1957 )
1959 # We should not be able to certify 2b with anything overlapping that
1960 # window.
1961 with self.assertRaises(ConflictingDefinitionError):
1962 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1963 with self.assertRaises(ConflictingDefinitionError):
1964 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1965 with self.assertRaises(ConflictingDefinitionError):
1966 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1967 with self.assertRaises(ConflictingDefinitionError):
1968 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1969 with self.assertRaises(ConflictingDefinitionError):
1970 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1971 with self.assertRaises(ConflictingDefinitionError):
1972 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1973 with self.assertRaises(ConflictingDefinitionError):
1974 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1975 with self.assertRaises(ConflictingDefinitionError):
1976 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1977 # We should be able to certify 3a with a range overlapping that window,
1978 # because it's for a different detector.
1979 # We'll certify 3a over [t1, t3).
1980 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1981 # Now we'll certify 2b and 3b together over [t4, ∞).
1982 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1984 # Fetch all associations and check that they are what we expect.
1985 self.assertCountEqual(
1986 list(
1987 registry.queryDatasetAssociations(
1988 "bias",
1989 collections=[collection, "imported_g", "imported_r"],
1990 )
1991 ),
1992 [
1993 DatasetAssociation(
1994 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1995 collection="imported_g",
1996 timespan=None,
1997 ),
1998 DatasetAssociation(
1999 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2000 collection="imported_r",
2001 timespan=None,
2002 ),
2003 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2004 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2005 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2006 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2007 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2008 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2009 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2010 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2011 ],
2012 )
2014 class Ambiguous:
2015 """Tag class to denote lookups that should be ambiguous."""
2017 pass
2019 def assertLookup(
2020 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2021 ) -> None:
2022 """Local function that asserts that a bias lookup returns the given
2023 expected result.
2024 """
2025 if expected is Ambiguous:
2026 with self.assertRaises(RuntimeError):
2027 registry.findDataset(
2028 "bias",
2029 collections=collection,
2030 instrument="Cam1",
2031 detector=detector,
2032 timespan=timespan,
2033 )
2034 else:
2035 self.assertEqual(
2036 expected,
2037 registry.findDataset(
2038 "bias",
2039 collections=collection,
2040 instrument="Cam1",
2041 detector=detector,
2042 timespan=timespan,
2043 ),
2044 )
2046 # Systematically test lookups against expected results.
2047 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2048 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2049 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2050 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2051 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2052 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2053 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2054 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2055 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2056 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2057 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2058 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2059 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2060 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2061 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2062 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2063 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2064 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2065 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2066 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2067 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2068 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2069 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2070 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2071 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2072 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2073 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2074 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2075 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2076 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2077 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2078 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2079 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2080 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2081 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2082 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2083 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2084 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2085 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2086 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2087 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2088 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2090 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2091 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2092 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2093 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2094 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2095 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2096 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2097 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2098 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2099 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2100 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2101 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2102 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2103 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2104 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2105 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2106 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2107 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2108 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2109 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2110 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2111 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2112 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2113 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2114 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2115 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2116 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2117 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2118 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2119 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2120 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2121 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2122 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2123 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2124 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2125 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2126 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2127 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2128 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2129 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2130 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2131 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2132 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2133 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2134 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2135 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2137 # Decertify everything, this time with explicit data IDs, then check
2138 # that no lookups succeed.
2139 registry.decertify(
2140 collection,
2141 "bias",
2142 Timespan(None, None),
2143 dataIds=[
2144 dict(instrument="Cam1", detector=2),
2145 dict(instrument="Cam1", detector=3),
2146 ],
2147 )
2148 for detector in (2, 3):
2149 for timespan in allTimespans:
2150 assertLookup(detector=detector, timespan=timespan, expected=None)
2151 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2152 # those.
2153 registry.certify(
2154 collection,
2155 [bias2a, bias3a],
2156 Timespan(None, None),
2157 )
2158 for timespan in allTimespans:
2159 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2160 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2161 # Decertify just bias2 over [t2, t4).
2162 # This should split a single certification row into two (and leave the
2163 # other existing row, for bias3a, alone).
2164 registry.decertify(
2165 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2166 )
2167 for timespan in allTimespans:
2168 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2169 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2170 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2171 if overlapsBefore and overlapsAfter:
2172 expected = Ambiguous
2173 elif overlapsBefore or overlapsAfter:
2174 expected = bias2a
2175 else:
2176 expected = None
2177 assertLookup(detector=2, timespan=timespan, expected=expected)
2179 def testSkipCalibs(self):
2180 """Test how queries handle skipping of calibration collections."""
2181 registry = self.makeRegistry()
2182 self.loadData(registry, "base.yaml")
2183 self.loadData(registry, "datasets.yaml")
2185 coll_calib = "Cam1/calibs/default"
2186 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2188 # Add all biases to the calibration collection.
2189 # Without this, the logic that prunes dataset subqueries based on
2190 # datasetType-collection summary information will fire before the logic
2191 # we want to test below. This is a good thing (it avoids the dreaded
2192 # NotImplementedError a bit more often) everywhere but here.
2193 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2195 coll_list = [coll_calib, "imported_g", "imported_r"]
2196 chain = "Cam1/chain"
2197 registry.registerCollection(chain, type=CollectionType.CHAINED)
2198 registry.setCollectionChain(chain, coll_list)
2200 # explicit list will raise if findFirst=True or there are temporal
2201 # dimensions
2202 with self.assertRaises(NotImplementedError):
2203 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2204 with self.assertRaises(NotImplementedError):
2205 registry.queryDataIds(
2206 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2207 ).count()
2209 # chain will skip
2210 datasets = list(registry.queryDatasets("bias", collections=chain))
2211 self.assertGreater(len(datasets), 0)
2213 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2214 self.assertGreater(len(dataIds), 0)
2216 # glob will skip too
2217 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2218 self.assertGreater(len(datasets), 0)
2220 # regular expression will skip too
2221 pattern = re.compile(".*")
2222 datasets = list(registry.queryDatasets("bias", collections=pattern))
2223 self.assertGreater(len(datasets), 0)
2225 # ellipsis should work as usual
2226 datasets = list(registry.queryDatasets("bias", collections=...))
2227 self.assertGreater(len(datasets), 0)
2229 # few tests with findFirst
2230 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2231 self.assertGreater(len(datasets), 0)
2233 def testIngestTimeQuery(self):
2235 registry = self.makeRegistry()
2236 self.loadData(registry, "base.yaml")
2237 dt0 = datetime.utcnow()
2238 self.loadData(registry, "datasets.yaml")
2239 dt1 = datetime.utcnow()
2241 datasets = list(registry.queryDatasets(..., collections=...))
2242 len0 = len(datasets)
2243 self.assertGreater(len0, 0)
2245 where = "ingest_date > T'2000-01-01'"
2246 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2247 len1 = len(datasets)
2248 self.assertEqual(len0, len1)
2250 # no one will ever use this piece of software in 30 years
2251 where = "ingest_date > T'2050-01-01'"
2252 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2253 len2 = len(datasets)
2254 self.assertEqual(len2, 0)
2256 # Check more exact timing to make sure there is no 37 seconds offset
2257 # (after fixing DM-30124). SQLite time precision is 1 second, make
2258 # sure that we don't test with higher precision.
2259 tests = [
2260 # format: (timestamp, operator, expected_len)
2261 (dt0 - timedelta(seconds=1), ">", len0),
2262 (dt0 - timedelta(seconds=1), "<", 0),
2263 (dt1 + timedelta(seconds=1), "<", len0),
2264 (dt1 + timedelta(seconds=1), ">", 0),
2265 ]
2266 for dt, op, expect_len in tests:
2267 dt_str = dt.isoformat(sep=" ")
2269 where = f"ingest_date {op} T'{dt_str}'"
2270 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2271 self.assertEqual(len(datasets), expect_len)
2273 # same with bind using datetime or astropy Time
2274 where = f"ingest_date {op} ingest_time"
2275 datasets = list(
2276 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2277 )
2278 self.assertEqual(len(datasets), expect_len)
2280 dt_astropy = astropy.time.Time(dt, format="datetime")
2281 datasets = list(
2282 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2283 )
2284 self.assertEqual(len(datasets), expect_len)
2286 def testTimespanQueries(self):
2287 """Test query expressions involving timespans."""
2288 registry = self.makeRegistry()
2289 self.loadData(registry, "hsc-rc2-subset.yaml")
2290 # All exposures in the database; mapping from ID to timespan.
2291 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2292 # Just those IDs, sorted (which is also temporal sorting, because HSC
2293 # exposure IDs are monotonically increasing).
2294 ids = sorted(visits.keys())
2295 self.assertGreater(len(ids), 20)
2296 # Pick some quasi-random indexes into `ids` to play with.
2297 i1 = int(len(ids) * 0.1)
2298 i2 = int(len(ids) * 0.3)
2299 i3 = int(len(ids) * 0.6)
2300 i4 = int(len(ids) * 0.8)
2301 # Extract some times from those: just before the beginning of i1 (which
2302 # should be after the end of the exposure before), exactly the
2303 # beginning of i2, just after the beginning of i3 (and before its end),
2304 # and the exact end of i4.
2305 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2306 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2307 t2 = visits[ids[i2]].begin
2308 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2309 self.assertLess(t3, visits[ids[i3]].end)
2310 t4 = visits[ids[i4]].end
2311 # Make sure those are actually in order.
2312 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2314 bind = {
2315 "t1": t1,
2316 "t2": t2,
2317 "t3": t3,
2318 "t4": t4,
2319 "ts23": Timespan(t2, t3),
2320 }
2322 def query(where):
2323 """Helper function that queries for visit data IDs and returns
2324 results as a sorted, deduplicated list of visit IDs.
2325 """
2326 return sorted(
2327 {
2328 dataId["visit"]
2329 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2330 }
2331 )
2333 # Try a bunch of timespan queries, mixing up the bounds themselves,
2334 # where they appear in the expression, and how we get the timespan into
2335 # the expression.
2337 # t1 is before the start of i1, so this should not include i1.
2338 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2339 # t2 is exactly at the start of i2, but ends are exclusive, so these
2340 # should not include i2.
2341 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2342 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2343 # t3 is in the middle of i3, so this should include i3.
2344 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2345 # This one should not include t3 by the same reasoning.
2346 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2347 # t4 is exactly at the end of i4, so this should include i4.
2348 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2349 # i4's upper bound of t4 is exclusive so this should not include t4.
2350 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2352 # Now some timespan vs. time scalar queries.
2353 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2354 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2355 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2356 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2357 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2358 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2360 # Empty timespans should not overlap anything.
2361 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2363 def testCollectionSummaries(self):
2364 """Test recording and retrieval of collection summaries."""
2365 self.maxDiff = None
2366 registry = self.makeRegistry()
2367 # Importing datasets from yaml should go through the code path where
2368 # we update collection summaries as we insert datasets.
2369 self.loadData(registry, "base.yaml")
2370 self.loadData(registry, "datasets.yaml")
2371 flat = registry.getDatasetType("flat")
2372 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2373 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2374 expected1.datasetTypes.add(flat)
2375 expected1.dimensions.update_extract(
2376 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2377 )
2378 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2379 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2380 # Create a chained collection with both of the imported runs; the
2381 # summary should be the same, because it's a union with itself.
2382 chain = "chain"
2383 registry.registerCollection(chain, CollectionType.CHAINED)
2384 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2385 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2386 # Associate flats only into a tagged collection and a calibration
2387 # collection to check summaries of those.
2388 tag = "tag"
2389 registry.registerCollection(tag, CollectionType.TAGGED)
2390 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2391 calibs = "calibs"
2392 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2393 registry.certify(
2394 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2395 )
2396 expected2 = expected1.copy()
2397 expected2.datasetTypes.discard("bias")
2398 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2399 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2400 # Explicitly calling Registry.refresh() should load those same
2401 # summaries, via a totally different code path.
2402 registry.refresh()
2403 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2404 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2405 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2406 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2408 def testBindInQueryDatasets(self):
2409 """Test that the bind parameter is correctly forwarded in
2410 queryDatasets recursion.
2411 """
2412 registry = self.makeRegistry()
2413 # Importing datasets from yaml should go through the code path where
2414 # we update collection summaries as we insert datasets.
2415 self.loadData(registry, "base.yaml")
2416 self.loadData(registry, "datasets.yaml")
2417 self.assertEqual(
2418 set(registry.queryDatasets("flat", band="r", collections=...)),
2419 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2420 )
2422 def testQueryResultSummaries(self):
2423 """Test summary methods like `count`, `any`, and `explain_no_results`
2424 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2425 """
2426 registry = self.makeRegistry()
2427 self.loadData(registry, "base.yaml")
2428 self.loadData(registry, "datasets.yaml")
2429 self.loadData(registry, "spatial.yaml")
2430 # Default test dataset has two collections, each with both flats and
2431 # biases. Add a new collection with only biases.
2432 registry.registerCollection("biases", CollectionType.TAGGED)
2433 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2434 # First query yields two results, and involves no postprocessing.
2435 query1 = registry.queryDataIds(["physical_filter"], band="r")
2436 self.assertTrue(query1.any(execute=False, exact=False))
2437 self.assertTrue(query1.any(execute=True, exact=False))
2438 self.assertTrue(query1.any(execute=True, exact=True))
2439 self.assertEqual(query1.count(exact=False), 2)
2440 self.assertEqual(query1.count(exact=True), 2)
2441 self.assertFalse(list(query1.explain_no_results()))
2442 # Second query should yield no results, but this isn't detectable
2443 # unless we actually run a query.
2444 query2 = registry.queryDataIds(["physical_filter"], band="h")
2445 self.assertTrue(query2.any(execute=False, exact=False))
2446 self.assertFalse(query2.any(execute=True, exact=False))
2447 self.assertFalse(query2.any(execute=True, exact=True))
2448 self.assertEqual(query2.count(exact=False), 0)
2449 self.assertEqual(query2.count(exact=True), 0)
2450 self.assertFalse(list(query2.explain_no_results()))
2451 # These queries yield no results due to various problems that can be
2452 # spotted prior to execution, yielding helpful diagnostics.
2453 base_query = registry.queryDataIds(["detector", "physical_filter"])
2454 for query, snippets in [
2455 (
2456 # Dataset type name doesn't match any existing dataset types.
2457 registry.queryDatasets("nonexistent", collections=...),
2458 ["nonexistent"],
2459 ),
2460 (
2461 # Dataset type name doesn't match any existing dataset types.
2462 base_query.findDatasets("nonexistent", collections=["biases"]),
2463 ["nonexistent"],
2464 ),
2465 (
2466 # Dataset type name doesn't match any existing dataset types.
2467 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2468 ["nonexistent"],
2469 ),
2470 (
2471 # Dataset type object isn't registered.
2472 registry.queryDatasets(
2473 DatasetType(
2474 "nonexistent",
2475 dimensions=["instrument"],
2476 universe=registry.dimensions,
2477 storageClass="Image",
2478 ),
2479 collections=...,
2480 ),
2481 ["nonexistent"],
2482 ),
2483 (
2484 # Dataset type object isn't registered.
2485 base_query.findDatasets(
2486 DatasetType(
2487 "nonexistent",
2488 dimensions=["instrument"],
2489 universe=registry.dimensions,
2490 storageClass="Image",
2491 ),
2492 collections=["biases"],
2493 ),
2494 ["nonexistent"],
2495 ),
2496 (
2497 # No datasets of this type in this collection.
2498 registry.queryDatasets("flat", collections=["biases"]),
2499 ["flat", "biases"],
2500 ),
2501 (
2502 # No datasets of this type in this collection.
2503 base_query.findDatasets("flat", collections=["biases"]),
2504 ["flat", "biases"],
2505 ),
2506 (
2507 # No collections matching at all.
2508 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2509 ["potato"],
2510 ),
2511 (
2512 # Dataset type name doesn't match any existing dataset types.
2513 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2514 ["nonexistent"],
2515 ),
2516 ]:
2518 self.assertFalse(query.any(execute=False, exact=False))
2519 self.assertFalse(query.any(execute=True, exact=False))
2520 self.assertFalse(query.any(execute=True, exact=True))
2521 self.assertEqual(query.count(exact=False), 0)
2522 self.assertEqual(query.count(exact=True), 0)
2523 messages = list(query.explain_no_results())
2524 self.assertTrue(messages)
2525 # Want all expected snippets to appear in at least one message.
2526 self.assertTrue(
2527 any(
2528 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2529 ),
2530 messages,
2531 )
2533 # These queries yield no results due to problems that can be identified
2534 # by cheap follow-up queries, yielding helpful diagnostics.
2535 for query, snippets in [
2536 (
2537 # No records for one of the involved dimensions.
2538 registry.queryDataIds(["subfilter"]),
2539 ["dimension records", "subfilter"],
2540 ),
2541 (
2542 # No records for one of the involved dimensions.
2543 registry.queryDimensionRecords("subfilter"),
2544 ["dimension records", "subfilter"],
2545 ),
2546 ]:
2547 self.assertFalse(query.any(execute=True, exact=False))
2548 self.assertFalse(query.any(execute=True, exact=True))
2549 self.assertEqual(query.count(exact=True), 0)
2550 messages = list(query.explain_no_results())
2551 self.assertTrue(messages)
2552 # Want all expected snippets to appear in at least one message.
2553 self.assertTrue(
2554 any(
2555 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2556 ),
2557 messages,
2558 )
2560 # This query yields four overlaps in the database, but one is filtered
2561 # out in postprocessing. The count queries aren't accurate because
2562 # they don't account for duplication that happens due to an internal
2563 # join against commonSkyPix.
2564 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2565 self.assertEqual(
2566 {
2567 DataCoordinate.standardize(
2568 instrument="Cam1",
2569 skymap="SkyMap1",
2570 visit=v,
2571 tract=t,
2572 universe=registry.dimensions,
2573 )
2574 for v, t in [(1, 0), (2, 0), (2, 1)]
2575 },
2576 set(query3),
2577 )
2578 self.assertTrue(query3.any(execute=False, exact=False))
2579 self.assertTrue(query3.any(execute=True, exact=False))
2580 self.assertTrue(query3.any(execute=True, exact=True))
2581 self.assertGreaterEqual(query3.count(exact=False), 4)
2582 self.assertGreaterEqual(query3.count(exact=True), 3)
2583 self.assertFalse(list(query3.explain_no_results()))
2584 # This query yields overlaps in the database, but all are filtered
2585 # out in postprocessing. The count queries again aren't very useful.
2586 # We have to use `where=` here to avoid an optimization that
2587 # (currently) skips the spatial postprocess-filtering because it
2588 # recognizes that no spatial join is necessary. That's not ideal, but
2589 # fixing it is out of scope for this ticket.
2590 query4 = registry.queryDataIds(
2591 ["visit", "tract"],
2592 instrument="Cam1",
2593 skymap="SkyMap1",
2594 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2595 )
2596 self.assertFalse(set(query4))
2597 self.assertTrue(query4.any(execute=False, exact=False))
2598 self.assertTrue(query4.any(execute=True, exact=False))
2599 self.assertFalse(query4.any(execute=True, exact=True))
2600 self.assertGreaterEqual(query4.count(exact=False), 1)
2601 self.assertEqual(query4.count(exact=True), 0)
2602 messages = list(query4.explain_no_results())
2603 self.assertTrue(messages)
2604 self.assertTrue(any("regions did not overlap" in message for message in messages))
2606 # And there are cases when queries make empty results but we do not
2607 # know how to explain that yet (could we just say miracles happen?)
2608 query5 = registry.queryDimensionRecords(
2609 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2610 )
2611 self.assertEqual(query5.count(exact=True), 0)
2612 messages = list(query5.explain_no_results())
2613 self.assertFalse(messages)
2615 def testQueryDataIdsOrderBy(self):
2616 """Test order_by and limit on result returned by queryDataIds()."""
2617 registry = self.makeRegistry()
2618 self.loadData(registry, "base.yaml")
2619 self.loadData(registry, "datasets.yaml")
2620 self.loadData(registry, "spatial.yaml")
2622 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2623 return registry.queryDataIds(
2624 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2625 )
2627 Test = namedtuple(
2628 "testQueryDataIdsOrderByTest",
2629 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2630 defaults=(None, None, None),
2631 )
2633 test_data = (
2634 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2635 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2636 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2637 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2638 Test(
2639 "tract.id,visit.id",
2640 "tract,visit",
2641 ((0, 1), (0, 1), (0, 2)),
2642 limit=(3,),
2643 ),
2644 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2645 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2646 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2647 Test(
2648 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2649 ),
2650 Test(
2651 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2652 ),
2653 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2654 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2655 Test(
2656 "tract,-timespan.begin,timespan.end",
2657 "tract,visit",
2658 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2659 ),
2660 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2661 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2662 Test(
2663 "tract,detector",
2664 "tract,detector",
2665 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2666 datasets="flat",
2667 collections="imported_r",
2668 ),
2669 Test(
2670 "tract,detector.full_name",
2671 "tract,detector",
2672 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2673 datasets="flat",
2674 collections="imported_r",
2675 ),
2676 Test(
2677 "tract,detector.raft,detector.name_in_raft",
2678 "tract,detector",
2679 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2680 datasets="flat",
2681 collections="imported_r",
2682 ),
2683 )
2685 for test in test_data:
2686 order_by = test.order_by.split(",")
2687 keys = test.keys.split(",")
2688 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2689 if test.limit is not None:
2690 query = query.limit(*test.limit)
2691 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2692 self.assertEqual(dataIds, test.result)
2694 # and materialize
2695 query = do_query(keys).order_by(*order_by)
2696 if test.limit is not None:
2697 query = query.limit(*test.limit)
2698 with query.materialize() as materialized:
2699 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2700 self.assertEqual(dataIds, test.result)
2702 # errors in a name
2703 for order_by in ("", "-"):
2704 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2705 list(do_query().order_by(order_by))
2707 for order_by in ("undimension.name", "-undimension.name"):
2708 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2709 list(do_query().order_by(order_by))
2711 for order_by in ("attract", "-attract"):
2712 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2713 list(do_query().order_by(order_by))
2715 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2716 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2718 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2719 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2721 with self.assertRaisesRegex(
2722 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2723 ):
2724 list(do_query(("tract")).order_by("timespan.begin"))
2726 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2727 list(do_query(("tract")).order_by("tract.timespan.begin"))
2729 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2730 list(do_query(("tract")).order_by("tract.name"))
2732 def testQueryDataIdsGovernorExceptions(self):
2733 """Test exceptions raised by queryDataIds() for incorrect governors."""
2734 registry = self.makeRegistry()
2735 self.loadData(registry, "base.yaml")
2736 self.loadData(registry, "datasets.yaml")
2737 self.loadData(registry, "spatial.yaml")
2739 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs):
2740 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2742 Test = namedtuple(
2743 "testQueryDataIdExceptionsTest",
2744 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2745 defaults=(None, None, None, {}, None, 0),
2746 )
2748 test_data = (
2749 Test("tract,visit", count=6),
2750 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2751 Test(
2752 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2753 ),
2754 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2755 Test(
2756 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2757 ),
2758 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2759 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2760 Test(
2761 "tract,visit",
2762 where="instrument=cam AND skymap=map",
2763 bind={"cam": "Cam1", "map": "SkyMap1"},
2764 count=6,
2765 ),
2766 Test(
2767 "tract,visit",
2768 where="instrument=cam AND skymap=map",
2769 bind={"cam": "Cam", "map": "SkyMap"},
2770 exception=DataIdValueError,
2771 ),
2772 )
2774 for test in test_data:
2775 dimensions = test.dimensions.split(",")
2776 if test.exception:
2777 with self.assertRaises(test.exception):
2778 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2779 else:
2780 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2781 self.assertEqual(query.count(), test.count)
2783 # and materialize
2784 if test.exception:
2785 with self.assertRaises(test.exception):
2786 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2787 with query.materialize() as materialized:
2788 materialized.count()
2789 else:
2790 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2791 with query.materialize() as materialized:
2792 self.assertEqual(materialized.count(), test.count)
2794 def testQueryDimensionRecordsOrderBy(self):
2795 """Test order_by and limit on result returned by
2796 queryDimensionRecords().
2797 """
2798 registry = self.makeRegistry()
2799 self.loadData(registry, "base.yaml")
2800 self.loadData(registry, "datasets.yaml")
2801 self.loadData(registry, "spatial.yaml")
2803 def do_query(element, datasets=None, collections=None):
2804 return registry.queryDimensionRecords(
2805 element, instrument="Cam1", datasets=datasets, collections=collections
2806 )
2808 query = do_query("detector")
2809 self.assertEqual(len(list(query)), 4)
2811 Test = namedtuple(
2812 "testQueryDataIdsOrderByTest",
2813 ("element", "order_by", "result", "limit", "datasets", "collections"),
2814 defaults=(None, None, None),
2815 )
2817 test_data = (
2818 Test("detector", "detector", (1, 2, 3, 4)),
2819 Test("detector", "-detector", (4, 3, 2, 1)),
2820 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2821 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2822 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2823 Test("visit", "visit", (1, 2)),
2824 Test("visit", "-visit.id", (2, 1)),
2825 Test("visit", "zenith_angle", (1, 2)),
2826 Test("visit", "-visit.name", (2, 1)),
2827 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2828 )
2830 for test in test_data:
2831 order_by = test.order_by.split(",")
2832 query = do_query(test.element).order_by(*order_by)
2833 if test.limit is not None:
2834 query = query.limit(*test.limit)
2835 dataIds = tuple(rec.id for rec in query)
2836 self.assertEqual(dataIds, test.result)
2838 # errors in a name
2839 for order_by in ("", "-"):
2840 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2841 list(do_query("detector").order_by(order_by))
2843 for order_by in ("undimension.name", "-undimension.name"):
2844 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2845 list(do_query("detector").order_by(order_by))
2847 for order_by in ("attract", "-attract"):
2848 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
2849 list(do_query("detector").order_by(order_by))
2851 def testQueryDimensionRecordsExceptions(self):
2852 """Test exceptions raised by queryDimensionRecords()."""
2853 registry = self.makeRegistry()
2854 self.loadData(registry, "base.yaml")
2855 self.loadData(registry, "datasets.yaml")
2856 self.loadData(registry, "spatial.yaml")
2858 result = registry.queryDimensionRecords("detector")
2859 self.assertEqual(result.count(), 4)
2860 result = registry.queryDimensionRecords("detector", instrument="Cam1")
2861 self.assertEqual(result.count(), 4)
2862 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
2863 self.assertEqual(result.count(), 4)
2864 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
2865 self.assertEqual(result.count(), 4)
2866 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
2867 self.assertEqual(result.count(), 4)
2869 with self.assertRaisesRegex(
2870 DataIdValueError, "Could not fetch record for required dimension instrument"
2871 ):
2872 registry.queryDimensionRecords("detector", instrument="NotCam1")
2874 with self.assertRaisesRegex(
2875 DataIdValueError, "Could not fetch record for required dimension instrument"
2876 ):
2877 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
2879 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2880 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
2881 result.count()
2883 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2884 result = registry.queryDimensionRecords(
2885 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
2886 )
2887 result.count()
2889 def testDatasetConstrainedDimensionRecordQueries(self):
2890 """Test that queryDimensionRecords works even when given a dataset
2891 constraint whose dimensions extend beyond the requested dimension
2892 element's.
2893 """
2894 registry = self.makeRegistry()
2895 self.loadData(registry, "base.yaml")
2896 self.loadData(registry, "datasets.yaml")
2897 # Query for physical_filter dimension records, using a dataset that
2898 # has both physical_filter and dataset dimensions.
2899 records = registry.queryDimensionRecords(
2900 "physical_filter",
2901 datasets=["flat"],
2902 collections="imported_r",
2903 )
2904 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
2906 def testSkyPixDatasetQueries(self):
2907 """Test that we can build queries involving skypix dimensions as long
2908 as a dataset type that uses those dimensions is included.
2909 """
2910 registry = self.makeRegistry()
2911 self.loadData(registry, "base.yaml")
2912 dataset_type = DatasetType(
2913 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
2914 )
2915 registry.registerDatasetType(dataset_type)
2916 run = "r"
2917 registry.registerRun(run)
2918 # First try queries where there are no datasets; the concern is whether
2919 # we can even build and execute these queries without raising, even
2920 # when "doomed" query shortcuts are in play.
2921 self.assertFalse(
2922 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
2923 )
2924 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
2925 # Now add a dataset and see that we can get it back.
2926 htm7 = registry.dimensions.skypix["htm"][7].pixelization
2927 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
2928 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
2929 self.assertEqual(
2930 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
2931 {data_id},
2932 )
2933 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})