Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
1325 statements
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
« prev ^ index » next coverage.py v6.4.4, created at 2022-09-27 08:58 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 Timespan,
56 ddl,
57)
58from .._collection_summary import CollectionSummary
59from .._collectionType import CollectionType
60from .._config import RegistryConfig
61from .._exceptions import (
62 ArgumentError,
63 CollectionError,
64 CollectionTypeError,
65 ConflictingDefinitionError,
66 DataIdValueError,
67 InconsistentDataIdError,
68 MissingCollectionError,
69 OrphanedRecordError,
70)
71from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._registry import Registry
77class RegistryTests(ABC):
78 """Generic tests for the `Registry` class that can be subclassed to
79 generate tests for different configurations.
80 """
82 collectionsManager: Optional[str] = None
83 """Name of the collections manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 datasetsManager: Optional[str] = None
89 """Name of the datasets manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 @classmethod
95 @abstractmethod
96 def getDataDir(cls) -> str:
97 """Return the root directory containing test data YAML files."""
98 raise NotImplementedError()
100 def makeRegistryConfig(self) -> RegistryConfig:
101 """Create RegistryConfig used to create a registry.
103 This method should be called by a subclass from `makeRegistry`.
104 Returned instance will be pre-configured based on the values of class
105 members, and default-configured for all other parameters. Subclasses
106 that need default configuration should just instantiate
107 `RegistryConfig` directly.
108 """
109 config = RegistryConfig()
110 if self.collectionsManager:
111 config["managers", "collections"] = self.collectionsManager
112 if self.datasetsManager:
113 config["managers", "datasets"] = self.datasetsManager
114 return config
116 @abstractmethod
117 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
118 """Return the Registry instance to be tested.
120 Parameters
121 ----------
122 share_repo_with : `Registry`, optional
123 If provided, the new registry should point to the same data
124 repository as this existing registry.
126 Returns
127 -------
128 registry : `Registry`
129 New `Registry` instance, or `None` *only* if `share_repo_with` is
130 not `None` and this test case does not support that argument
131 (e.g. it is impossible with in-memory SQLite DBs).
132 """
133 raise NotImplementedError()
135 def loadData(self, registry: Registry, filename: str):
136 """Load registry test data from ``getDataDir/<filename>``,
137 which should be a YAML import/export file.
138 """
139 from ...transfers import YamlRepoImportBackend
141 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
142 backend = YamlRepoImportBackend(stream, registry)
143 backend.register()
144 backend.load(datastore=None)
146 def checkQueryResults(self, results, expected):
147 """Check that a query results object contains expected values.
149 Parameters
150 ----------
151 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
152 A lazy-evaluation query results object.
153 expected : `list`
154 A list of `DataCoordinate` o `DatasetRef` objects that should be
155 equal to results of the query, aside from ordering.
156 """
157 self.assertCountEqual(list(results), expected)
158 self.assertEqual(results.count(), len(expected))
159 if expected:
160 self.assertTrue(results.any())
161 else:
162 self.assertFalse(results.any())
164 def testOpaque(self):
165 """Tests for `Registry.registerOpaqueTable`,
166 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
167 `Registry.deleteOpaqueData`.
168 """
169 registry = self.makeRegistry()
170 table = "opaque_table_for_testing"
171 registry.registerOpaqueTable(
172 table,
173 spec=ddl.TableSpec(
174 fields=[
175 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
176 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
177 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
178 ],
179 ),
180 )
181 rows = [
182 {"id": 1, "name": "one", "count": None},
183 {"id": 2, "name": "two", "count": 5},
184 {"id": 3, "name": "three", "count": 6},
185 ]
186 registry.insertOpaqueData(table, *rows)
187 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
188 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
189 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
190 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
191 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
192 # Test very long IN clause which exceeds sqlite limit on number of
193 # parameters. SQLite says the limit is 32k but it looks like it is
194 # much higher.
195 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
196 # Two IN clauses, each longer than 1k batch size, first with
197 # duplicates, second has matching elements in different batches (after
198 # sorting).
199 self.assertEqual(
200 rows[0:2],
201 list(
202 registry.fetchOpaqueData(
203 table,
204 id=list(range(1000)) + list(range(100, 0, -1)),
205 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
206 )
207 ),
208 )
209 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
210 registry.deleteOpaqueData(table, id=3)
211 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
212 registry.deleteOpaqueData(table)
213 self.assertEqual([], list(registry.fetchOpaqueData(table)))
215 def testDatasetType(self):
216 """Tests for `Registry.registerDatasetType` and
217 `Registry.getDatasetType`.
218 """
219 registry = self.makeRegistry()
220 # Check valid insert
221 datasetTypeName = "test"
222 storageClass = StorageClass("testDatasetType")
223 registry.storageClasses.registerStorageClass(storageClass)
224 dimensions = registry.dimensions.extract(("instrument", "visit"))
225 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
227 # Inserting for the first time should return True
228 self.assertTrue(registry.registerDatasetType(inDatasetType))
229 outDatasetType1 = registry.getDatasetType(datasetTypeName)
230 self.assertEqual(outDatasetType1, inDatasetType)
232 # Re-inserting should work
233 self.assertFalse(registry.registerDatasetType(inDatasetType))
234 # Except when they are not identical
235 with self.assertRaises(ConflictingDefinitionError):
236 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
237 registry.registerDatasetType(nonIdenticalDatasetType)
239 # Template can be None
240 datasetTypeName = "testNoneTemplate"
241 storageClass = StorageClass("testDatasetType2")
242 registry.storageClasses.registerStorageClass(storageClass)
243 dimensions = registry.dimensions.extract(("instrument", "visit"))
244 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
245 registry.registerDatasetType(inDatasetType)
246 outDatasetType2 = registry.getDatasetType(datasetTypeName)
247 self.assertEqual(outDatasetType2, inDatasetType)
249 allTypes = set(registry.queryDatasetTypes())
250 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
252 def testDimensions(self):
253 """Tests for `Registry.insertDimensionData`,
254 `Registry.syncDimensionData`, and `Registry.expandDataId`.
255 """
256 registry = self.makeRegistry()
257 dimensionName = "instrument"
258 dimension = registry.dimensions[dimensionName]
259 dimensionValue = {
260 "name": "DummyCam",
261 "visit_max": 10,
262 "visit_system": 0,
263 "exposure_max": 10,
264 "detector_max": 2,
265 "class_name": "lsst.pipe.base.Instrument",
266 }
267 registry.insertDimensionData(dimensionName, dimensionValue)
268 # Inserting the same value twice should fail
269 with self.assertRaises(sqlalchemy.exc.IntegrityError):
270 registry.insertDimensionData(dimensionName, dimensionValue)
271 # expandDataId should retrieve the record we just inserted
272 self.assertEqual(
273 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
274 .records[dimensionName]
275 .toDict(),
276 dimensionValue,
277 )
278 # expandDataId should raise if there is no record with the given ID.
279 with self.assertRaises(DataIdValueError):
280 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
281 # band doesn't have a table; insert should fail.
282 with self.assertRaises(TypeError):
283 registry.insertDimensionData("band", {"band": "i"})
284 dimensionName2 = "physical_filter"
285 dimension2 = registry.dimensions[dimensionName2]
286 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
287 # Missing required dependency ("instrument") should fail
288 with self.assertRaises(KeyError):
289 registry.insertDimensionData(dimensionName2, dimensionValue2)
290 # Adding required dependency should fix the failure
291 dimensionValue2["instrument"] = "DummyCam"
292 registry.insertDimensionData(dimensionName2, dimensionValue2)
293 # expandDataId should retrieve the record we just inserted.
294 self.assertEqual(
295 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
296 .records[dimensionName2]
297 .toDict(),
298 dimensionValue2,
299 )
300 # Use syncDimensionData to insert a new record successfully.
301 dimensionName3 = "detector"
302 dimensionValue3 = {
303 "instrument": "DummyCam",
304 "id": 1,
305 "full_name": "one",
306 "name_in_raft": "zero",
307 "purpose": "SCIENCE",
308 }
309 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
310 # Sync that again. Note that one field ("raft") is NULL, and that
311 # should be okay.
312 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
313 # Now try that sync with the same primary key but a different value.
314 # This should fail.
315 with self.assertRaises(ConflictingDefinitionError):
316 registry.syncDimensionData(
317 dimensionName3,
318 {
319 "instrument": "DummyCam",
320 "id": 1,
321 "full_name": "one",
322 "name_in_raft": "four",
323 "purpose": "SCIENCE",
324 },
325 )
327 @unittest.skipIf(np is None, "numpy not available.")
328 def testNumpyDataId(self):
329 """Test that we can use a numpy int in a dataId."""
330 registry = self.makeRegistry()
331 dimensionEntries = [
332 ("instrument", {"instrument": "DummyCam"}),
333 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
334 # Using an np.int64 here fails unless Records.fromDict is also
335 # patched to look for numbers.Integral
336 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
337 ]
338 for args in dimensionEntries:
339 registry.insertDimensionData(*args)
341 # Try a normal integer and something that looks like an int but
342 # is not.
343 for visit_id in (42, np.int64(42)):
344 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
345 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
346 self.assertEqual(expanded["visit"], int(visit_id))
347 self.assertIsInstance(expanded["visit"], int)
349 def testDataIdRelationships(self):
350 """Test that `Registry.expandDataId` raises an exception when the given
351 keys are inconsistent.
352 """
353 registry = self.makeRegistry()
354 self.loadData(registry, "base.yaml")
355 # Insert a few more dimension records for the next test.
356 registry.insertDimensionData(
357 "exposure",
358 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
359 )
360 registry.insertDimensionData(
361 "exposure",
362 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
363 )
364 registry.insertDimensionData(
365 "visit_system",
366 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
367 )
368 registry.insertDimensionData(
369 "visit",
370 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
371 )
372 registry.insertDimensionData(
373 "visit_definition",
374 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
375 )
376 with self.assertRaises(InconsistentDataIdError):
377 registry.expandDataId(
378 {"instrument": "Cam1", "visit": 1, "exposure": 2},
379 )
381 def testDataset(self):
382 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
383 and `Registry.removeDatasets`.
384 """
385 registry = self.makeRegistry()
386 self.loadData(registry, "base.yaml")
387 run = "tésτ"
388 registry.registerRun(run)
389 datasetType = registry.getDatasetType("bias")
390 dataId = {"instrument": "Cam1", "detector": 2}
391 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
392 outRef = registry.getDataset(ref.id)
393 self.assertIsNotNone(ref.id)
394 self.assertEqual(ref, outRef)
395 with self.assertRaises(ConflictingDefinitionError):
396 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
397 registry.removeDatasets([ref])
398 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
400 def testFindDataset(self):
401 """Tests for `Registry.findDataset`."""
402 registry = self.makeRegistry()
403 self.loadData(registry, "base.yaml")
404 run = "tésτ"
405 datasetType = registry.getDatasetType("bias")
406 dataId = {"instrument": "Cam1", "detector": 4}
407 registry.registerRun(run)
408 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
409 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
410 self.assertEqual(outputRef, inputRef)
411 # Check that retrieval with invalid dataId raises
412 with self.assertRaises(LookupError):
413 dataId = {"instrument": "Cam1"} # no detector
414 registry.findDataset(datasetType, dataId, collections=run)
415 # Check that different dataIds match to different datasets
416 dataId1 = {"instrument": "Cam1", "detector": 1}
417 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
418 dataId2 = {"instrument": "Cam1", "detector": 2}
419 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
420 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
421 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
422 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
423 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
424 # Check that requesting a non-existing dataId returns None
425 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
426 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
428 def testRemoveDatasetTypeSuccess(self):
429 """Test that Registry.removeDatasetType works when there are no
430 datasets of that type present.
431 """
432 registry = self.makeRegistry()
433 self.loadData(registry, "base.yaml")
434 registry.removeDatasetType("flat")
435 with self.assertRaises(KeyError):
436 registry.getDatasetType("flat")
438 def testRemoveDatasetTypeFailure(self):
439 """Test that Registry.removeDatasetType raises when there are datasets
440 of that type present or if the dataset type is for a component.
441 """
442 registry = self.makeRegistry()
443 self.loadData(registry, "base.yaml")
444 self.loadData(registry, "datasets.yaml")
445 with self.assertRaises(OrphanedRecordError):
446 registry.removeDatasetType("flat")
447 with self.assertRaises(ValueError):
448 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
450 def testImportDatasetsUUID(self):
451 """Test for `Registry._importDatasets` with UUID dataset ID."""
452 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
453 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
455 registry = self.makeRegistry()
456 self.loadData(registry, "base.yaml")
457 for run in range(6):
458 registry.registerRun(f"run{run}")
459 datasetTypeBias = registry.getDatasetType("bias")
460 datasetTypeFlat = registry.getDatasetType("flat")
461 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
462 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
463 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
465 dataset_id = uuid.uuid4()
466 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
467 (ref1,) = registry._importDatasets([ref])
468 # UUID is used without change
469 self.assertEqual(ref.id, ref1.id)
471 # All different failure modes
472 refs = (
473 # Importing same DatasetRef with different dataset ID is an error
474 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
475 # Same DatasetId but different DataId
476 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
477 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
478 # Same DatasetRef and DatasetId but different run
479 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
480 )
481 for ref in refs:
482 with self.assertRaises(ConflictingDefinitionError):
483 registry._importDatasets([ref])
485 # Test for non-unique IDs, they can be re-imported multiple times.
486 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
487 with self.subTest(idGenMode=idGenMode):
489 # Use integer dataset ID to force UUID calculation in _import
490 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
491 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
492 self.assertIsInstance(ref1.id, uuid.UUID)
493 self.assertEqual(ref1.id.version, 5)
495 # Importing it again is OK
496 (ref2,) = registry._importDatasets([ref1])
497 self.assertEqual(ref2.id, ref1.id)
499 # Cannot import to different run with the same ID
500 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
501 with self.assertRaises(ConflictingDefinitionError):
502 registry._importDatasets([ref])
504 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
505 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
506 # Cannot import same DATAID_TYPE ref into a new run
507 with self.assertRaises(ConflictingDefinitionError):
508 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
509 else:
510 # DATAID_TYPE_RUN ref can be imported into a new run
511 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
513 def testImportDatasetsInt(self):
514 """Test for `Registry._importDatasets` with integer dataset ID."""
515 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
516 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
518 registry = self.makeRegistry()
519 self.loadData(registry, "base.yaml")
520 run = "tésτ"
521 registry.registerRun(run)
522 datasetTypeBias = registry.getDatasetType("bias")
523 datasetTypeFlat = registry.getDatasetType("flat")
524 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
525 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
526 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
527 dataset_id = 999999999
529 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
530 (ref1,) = registry._importDatasets([ref])
531 # Should make new integer ID.
532 self.assertNotEqual(ref1.id, ref.id)
534 # Ingesting same dataId with different dataset ID is an error
535 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
536 with self.assertRaises(ConflictingDefinitionError):
537 registry._importDatasets([ref2])
539 # Ingesting different dataId with the same dataset ID should work
540 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
541 (ref4,) = registry._importDatasets([ref3])
542 self.assertNotEqual(ref4.id, ref1.id)
544 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
545 (ref4,) = registry._importDatasets([ref3])
546 self.assertNotEqual(ref4.id, ref1.id)
548 def testDatasetTypeComponentQueries(self):
549 """Test component options when querying for dataset types."""
550 registry = self.makeRegistry()
551 self.loadData(registry, "base.yaml")
552 self.loadData(registry, "datasets.yaml")
553 # Test querying for dataset types with different inputs.
554 # First query for all dataset types; components should only be included
555 # when components=True.
556 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
557 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
558 self.assertLess(
559 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
560 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
561 )
562 # Use a pattern that can match either parent or components. Again,
563 # components are only returned if components=True.
564 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
565 self.assertEqual(
566 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
567 )
568 self.assertLess(
569 {"bias", "bias.wcs"},
570 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
571 )
572 # This pattern matches only a component. In this case we also return
573 # that component dataset type if components=None.
574 self.assertEqual(
575 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
576 )
577 self.assertEqual(
578 set(),
579 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
580 )
581 self.assertEqual(
582 {"bias.wcs"},
583 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
584 )
585 # Add a dataset type using a StorageClass that we'll then remove; check
586 # that this does not affect our ability to query for dataset types
587 # (though it will warn).
588 tempStorageClass = StorageClass(
589 name="TempStorageClass",
590 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")},
591 )
592 registry.storageClasses.registerStorageClass(tempStorageClass)
593 datasetType = DatasetType(
594 "temporary",
595 dimensions=["instrument"],
596 storageClass=tempStorageClass,
597 universe=registry.dimensions,
598 )
599 registry.registerDatasetType(datasetType)
600 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
601 datasetType._storageClass = None
602 del tempStorageClass
603 # Querying for all dataset types, including components, should include
604 # at least all non-component dataset types (and I don't want to
605 # enumerate all of the Exposure components for bias and flat here).
606 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
607 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
608 self.assertIn("TempStorageClass", cm.output[0])
609 self.assertLess({"bias", "flat", "temporary"}, everything.names)
610 # It should not include "temporary.columns", because we tried to remove
611 # the storage class that would tell it about that. So if the next line
612 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
613 # this part of the test isn't doing anything, because the _unregister
614 # call about isn't simulating the real-life case we want it to
615 # simulate, in which different versions of daf_butler in entirely
616 # different Python processes interact with the same repo.
617 self.assertNotIn("temporary.data", everything.names)
618 # Query for dataset types that start with "temp". This should again
619 # not include the component, and also not fail.
620 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
621 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
622 self.assertIn("TempStorageClass", cm.output[0])
623 self.assertEqual({"temporary"}, startsWithTemp.names)
624 # Querying with no components should not warn at all.
625 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
626 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
627 # Must issue a warning of our own to be captured.
628 logging.getLogger("lsst.daf.butler.registries").warning("test message")
629 self.assertEqual(len(cm.output), 1)
630 self.assertIn("test message", cm.output[0])
632 def testComponentLookups(self):
633 """Test searching for component datasets via their parents."""
634 registry = self.makeRegistry()
635 self.loadData(registry, "base.yaml")
636 self.loadData(registry, "datasets.yaml")
637 # Test getting the child dataset type (which does still exist in the
638 # Registry), and check for consistency with
639 # DatasetRef.makeComponentRef.
640 collection = "imported_g"
641 parentType = registry.getDatasetType("bias")
642 childType = registry.getDatasetType("bias.wcs")
643 parentRefResolved = registry.findDataset(
644 parentType, collections=collection, instrument="Cam1", detector=1
645 )
646 self.assertIsInstance(parentRefResolved, DatasetRef)
647 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
648 # Search for a single dataset with findDataset.
649 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
650 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
651 # Search for detector data IDs constrained by component dataset
652 # existence with queryDataIds.
653 dataIds = registry.queryDataIds(
654 ["detector"],
655 datasets=["bias.wcs"],
656 collections=collection,
657 ).toSet()
658 self.assertEqual(
659 dataIds,
660 DataCoordinateSet(
661 {
662 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
663 for d in (1, 2, 3)
664 },
665 parentType.dimensions,
666 ),
667 )
668 # Search for multiple datasets of a single type with queryDatasets.
669 childRefs2 = set(
670 registry.queryDatasets(
671 "bias.wcs",
672 collections=collection,
673 )
674 )
675 self.assertEqual(
676 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
677 )
679 def testCollections(self):
680 """Tests for registry methods that manage collections."""
681 registry = self.makeRegistry()
682 other_registry = self.makeRegistry(share_repo_with=registry)
683 self.loadData(registry, "base.yaml")
684 self.loadData(registry, "datasets.yaml")
685 run1 = "imported_g"
686 run2 = "imported_r"
687 # Test setting a collection docstring after it has been created.
688 registry.setCollectionDocumentation(run1, "doc for run1")
689 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
690 registry.setCollectionDocumentation(run1, None)
691 self.assertIsNone(registry.getCollectionDocumentation(run1))
692 datasetType = "bias"
693 # Find some datasets via their run's collection.
694 dataId1 = {"instrument": "Cam1", "detector": 1}
695 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
696 self.assertIsNotNone(ref1)
697 dataId2 = {"instrument": "Cam1", "detector": 2}
698 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
699 self.assertIsNotNone(ref2)
700 # Associate those into a new collection, then look for them there.
701 tag1 = "tag1"
702 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
703 # Check that we can query for old and new collections by type.
704 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
705 self.assertEqual(
706 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
707 {tag1, run1, run2},
708 )
709 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
710 registry.associate(tag1, [ref1, ref2])
711 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
712 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
713 # Disassociate one and verify that we can't it there anymore...
714 registry.disassociate(tag1, [ref1])
715 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
716 # ...but we can still find ref2 in tag1, and ref1 in the run.
717 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
718 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
719 collections = set(registry.queryCollections())
720 self.assertEqual(collections, {run1, run2, tag1})
721 # Associate both refs into tag1 again; ref2 is already there, but that
722 # should be a harmless no-op.
723 registry.associate(tag1, [ref1, ref2])
724 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
725 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
726 # Get a different dataset (from a different run) that has the same
727 # dataset type and data ID as ref2.
728 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
729 self.assertNotEqual(ref2, ref2b)
730 # Attempting to associate that into tag1 should be an error.
731 with self.assertRaises(ConflictingDefinitionError):
732 registry.associate(tag1, [ref2b])
733 # That error shouldn't have messed up what we had before.
734 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
735 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
736 # Attempt to associate the conflicting dataset again, this time with
737 # a dataset that isn't in the collection and won't cause a conflict.
738 # Should also fail without modifying anything.
739 dataId3 = {"instrument": "Cam1", "detector": 3}
740 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
741 with self.assertRaises(ConflictingDefinitionError):
742 registry.associate(tag1, [ref3, ref2b])
743 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
744 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
745 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
746 # Register a chained collection that searches [tag1, run2]
747 chain1 = "chain1"
748 registry.registerCollection(chain1, type=CollectionType.CHAINED)
749 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
750 # Chained collection exists, but has no collections in it.
751 self.assertFalse(registry.getCollectionChain(chain1))
752 # If we query for all collections, we should get the chained collection
753 # only if we don't ask to flatten it (i.e. yield only its children).
754 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
755 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
756 # Attempt to set its child collections to something circular; that
757 # should fail.
758 with self.assertRaises(ValueError):
759 registry.setCollectionChain(chain1, [tag1, chain1])
760 # Add the child collections.
761 registry.setCollectionChain(chain1, [tag1, run2])
762 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
763 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
764 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
765 # Refresh the other registry that points to the same repo, and make
766 # sure it can see the things we've done (note that this does require
767 # an explicit refresh(); that's the documented behavior, because
768 # caching is ~impossible otherwise).
769 if other_registry is not None:
770 other_registry.refresh()
771 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
772 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
773 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
774 # Searching for dataId1 or dataId2 in the chain should return ref1 and
775 # ref2, because both are in tag1.
776 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
777 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
778 # Now disassociate ref2 from tag1. The search (for bias) with
779 # dataId2 in chain1 should then:
780 # 1. not find it in tag1
781 # 2. find a different dataset in run2
782 registry.disassociate(tag1, [ref2])
783 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
784 self.assertNotEqual(ref2b, ref2)
785 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
786 # Define a new chain so we can test recursive chains.
787 chain2 = "chain2"
788 registry.registerCollection(chain2, type=CollectionType.CHAINED)
789 registry.setCollectionChain(chain2, [run2, chain1])
790 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
791 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
792 # Query for collections matching a regex.
793 self.assertCountEqual(
794 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
795 ["imported_r", "imported_g"],
796 )
797 # Query for collections matching a regex or an explicit str.
798 self.assertCountEqual(
799 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
800 ["imported_r", "imported_g", "chain1"],
801 )
802 # Search for bias with dataId1 should find it via tag1 in chain2,
803 # recursing, because is not in run1.
804 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
805 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
806 # Search for bias with dataId2 should find it in run2 (ref2b).
807 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
808 # Search for a flat that is in run2. That should not be found
809 # at the front of chain2, because of the restriction to bias
810 # on run2 there, but it should be found in at the end of chain1.
811 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
812 ref4 = registry.findDataset("flat", dataId4, collections=run2)
813 self.assertIsNotNone(ref4)
814 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
815 # Deleting a collection that's part of a CHAINED collection is not
816 # allowed, and is exception-safe.
817 with self.assertRaises(Exception):
818 registry.removeCollection(run2)
819 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
820 with self.assertRaises(Exception):
821 registry.removeCollection(chain1)
822 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
823 # Actually remove chain2, test that it's gone by asking for its type.
824 registry.removeCollection(chain2)
825 with self.assertRaises(MissingCollectionError):
826 registry.getCollectionType(chain2)
827 # Actually remove run2 and chain1, which should work now.
828 registry.removeCollection(chain1)
829 registry.removeCollection(run2)
830 with self.assertRaises(MissingCollectionError):
831 registry.getCollectionType(run2)
832 with self.assertRaises(MissingCollectionError):
833 registry.getCollectionType(chain1)
834 # Remove tag1 as well, just to test that we can remove TAGGED
835 # collections.
836 registry.removeCollection(tag1)
837 with self.assertRaises(MissingCollectionError):
838 registry.getCollectionType(tag1)
840 def testCollectionChainFlatten(self):
841 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
842 registry = self.makeRegistry()
843 registry.registerCollection("inner", CollectionType.CHAINED)
844 registry.registerCollection("innermost", CollectionType.RUN)
845 registry.setCollectionChain("inner", ["innermost"])
846 registry.registerCollection("outer", CollectionType.CHAINED)
847 registry.setCollectionChain("outer", ["inner"], flatten=False)
848 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
849 registry.setCollectionChain("outer", ["inner"], flatten=True)
850 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
852 def testBasicTransaction(self):
853 """Test that all operations within a single transaction block are
854 rolled back if an exception propagates out of the block.
855 """
856 registry = self.makeRegistry()
857 storageClass = StorageClass("testDatasetType")
858 registry.storageClasses.registerStorageClass(storageClass)
859 with registry.transaction():
860 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
861 with self.assertRaises(ValueError):
862 with registry.transaction():
863 registry.insertDimensionData("instrument", {"name": "Cam2"})
864 raise ValueError("Oops, something went wrong")
865 # Cam1 should exist
866 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
867 # But Cam2 and Cam3 should both not exist
868 with self.assertRaises(DataIdValueError):
869 registry.expandDataId(instrument="Cam2")
870 with self.assertRaises(DataIdValueError):
871 registry.expandDataId(instrument="Cam3")
873 def testNestedTransaction(self):
874 """Test that operations within a transaction block are not rolled back
875 if an exception propagates out of an inner transaction block and is
876 then caught.
877 """
878 registry = self.makeRegistry()
879 dimension = registry.dimensions["instrument"]
880 dataId1 = {"instrument": "DummyCam"}
881 dataId2 = {"instrument": "DummyCam2"}
882 checkpointReached = False
883 with registry.transaction():
884 # This should be added and (ultimately) committed.
885 registry.insertDimensionData(dimension, dataId1)
886 with self.assertRaises(sqlalchemy.exc.IntegrityError):
887 with registry.transaction(savepoint=True):
888 # This does not conflict, and should succeed (but not
889 # be committed).
890 registry.insertDimensionData(dimension, dataId2)
891 checkpointReached = True
892 # This should conflict and raise, triggerring a rollback
893 # of the previous insertion within the same transaction
894 # context, but not the original insertion in the outer
895 # block.
896 registry.insertDimensionData(dimension, dataId1)
897 self.assertTrue(checkpointReached)
898 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
899 with self.assertRaises(DataIdValueError):
900 registry.expandDataId(dataId2, graph=dimension.graph)
902 def testInstrumentDimensions(self):
903 """Test queries involving only instrument dimensions, with no joins to
904 skymap."""
905 registry = self.makeRegistry()
907 # need a bunch of dimensions and datasets for test
908 registry.insertDimensionData(
909 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
910 )
911 registry.insertDimensionData(
912 "physical_filter",
913 dict(instrument="DummyCam", name="dummy_r", band="r"),
914 dict(instrument="DummyCam", name="dummy_i", band="i"),
915 )
916 registry.insertDimensionData(
917 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
918 )
919 registry.insertDimensionData(
920 "visit_system",
921 dict(instrument="DummyCam", id=1, name="default"),
922 )
923 registry.insertDimensionData(
924 "visit",
925 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
926 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
927 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
928 )
929 registry.insertDimensionData(
930 "exposure",
931 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
932 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
933 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
934 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
935 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
936 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
937 )
938 registry.insertDimensionData(
939 "visit_definition",
940 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
941 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
942 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
943 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
944 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
945 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
946 )
947 # dataset types
948 run1 = "test1_r"
949 run2 = "test2_r"
950 tagged2 = "test2_t"
951 registry.registerRun(run1)
952 registry.registerRun(run2)
953 registry.registerCollection(tagged2)
954 storageClass = StorageClass("testDataset")
955 registry.storageClasses.registerStorageClass(storageClass)
956 rawType = DatasetType(
957 name="RAW",
958 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
959 storageClass=storageClass,
960 )
961 registry.registerDatasetType(rawType)
962 calexpType = DatasetType(
963 name="CALEXP",
964 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
965 storageClass=storageClass,
966 )
967 registry.registerDatasetType(calexpType)
969 # add pre-existing datasets
970 for exposure in (100, 101, 110, 111):
971 for detector in (1, 2, 3):
972 # note that only 3 of 5 detectors have datasets
973 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
974 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
975 # exposures 100 and 101 appear in both run1 and tagged2.
976 # 100 has different datasets in the different collections
977 # 101 has the same dataset in both collections.
978 if exposure == 100:
979 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
980 if exposure in (100, 101):
981 registry.associate(tagged2, [ref])
982 # Add pre-existing datasets to tagged2.
983 for exposure in (200, 201):
984 for detector in (3, 4, 5):
985 # note that only 3 of 5 detectors have datasets
986 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
987 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
988 registry.associate(tagged2, [ref])
990 dimensions = DimensionGraph(
991 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
992 )
993 # Test that single dim string works as well as list of str
994 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
995 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
996 self.assertEqual(rows, rowsI)
997 # with empty expression
998 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
999 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1000 for dataId in rows:
1001 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1002 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
1003 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
1004 self.assertEqual(
1005 packer1.unpack(packer1.pack(dataId)),
1006 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1007 )
1008 self.assertEqual(
1009 packer2.unpack(packer2.pack(dataId)),
1010 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1011 )
1012 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1013 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1014 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1015 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1017 # second collection
1018 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1019 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1020 for dataId in rows:
1021 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1022 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1023 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1024 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1026 # with two input datasets
1027 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1028 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1029 for dataId in rows:
1030 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1031 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1032 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1033 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1035 # limit to single visit
1036 rows = registry.queryDataIds(
1037 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1038 ).toSet()
1039 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1040 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1041 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1042 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1044 # more limiting expression, using link names instead of Table.column
1045 rows = registry.queryDataIds(
1046 dimensions,
1047 datasets=rawType,
1048 collections=run1,
1049 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1050 ).toSet()
1051 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1052 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1053 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1054 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1056 # queryDataIds with only one of `datasets` and `collections` is an
1057 # error.
1058 with self.assertRaises(CollectionError):
1059 registry.queryDataIds(dimensions, datasets=rawType)
1060 with self.assertRaises(ArgumentError):
1061 registry.queryDataIds(dimensions, collections=run1)
1063 # expression excludes everything
1064 rows = registry.queryDataIds(
1065 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1066 ).toSet()
1067 self.assertEqual(len(rows), 0)
1069 # Selecting by physical_filter, this is not in the dimensions, but it
1070 # is a part of the full expression so it should work too.
1071 rows = registry.queryDataIds(
1072 dimensions,
1073 datasets=rawType,
1074 collections=run1,
1075 where="physical_filter = 'dummy_r'",
1076 instrument="DummyCam",
1077 ).toSet()
1078 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1079 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1080 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1081 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1083 def testSkyMapDimensions(self):
1084 """Tests involving only skymap dimensions, no joins to instrument."""
1085 registry = self.makeRegistry()
1087 # need a bunch of dimensions and datasets for test, we want
1088 # "band" in the test so also have to add physical_filter
1089 # dimensions
1090 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1091 registry.insertDimensionData(
1092 "physical_filter",
1093 dict(instrument="DummyCam", name="dummy_r", band="r"),
1094 dict(instrument="DummyCam", name="dummy_i", band="i"),
1095 )
1096 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1097 for tract in range(10):
1098 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1099 registry.insertDimensionData(
1100 "patch",
1101 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1102 )
1104 # dataset types
1105 run = "tésτ"
1106 registry.registerRun(run)
1107 storageClass = StorageClass("testDataset")
1108 registry.storageClasses.registerStorageClass(storageClass)
1109 calexpType = DatasetType(
1110 name="deepCoadd_calexp",
1111 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1112 storageClass=storageClass,
1113 )
1114 registry.registerDatasetType(calexpType)
1115 mergeType = DatasetType(
1116 name="deepCoadd_mergeDet",
1117 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1118 storageClass=storageClass,
1119 )
1120 registry.registerDatasetType(mergeType)
1121 measType = DatasetType(
1122 name="deepCoadd_meas",
1123 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1124 storageClass=storageClass,
1125 )
1126 registry.registerDatasetType(measType)
1128 dimensions = DimensionGraph(
1129 registry.dimensions,
1130 dimensions=(
1131 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1132 ),
1133 )
1135 # add pre-existing datasets
1136 for tract in (1, 3, 5):
1137 for patch in (2, 4, 6, 7):
1138 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1139 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1140 for aFilter in ("i", "r"):
1141 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1142 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1144 # with empty expression
1145 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1146 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1147 for dataId in rows:
1148 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1149 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1150 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1151 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1153 # limit to 2 tracts and 2 patches
1154 rows = registry.queryDataIds(
1155 dimensions,
1156 datasets=[calexpType, mergeType],
1157 collections=run,
1158 where="tract IN (1, 5) AND patch IN (2, 7)",
1159 skymap="DummyMap",
1160 ).toSet()
1161 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1162 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1163 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1164 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1166 # limit to single filter
1167 rows = registry.queryDataIds(
1168 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1169 ).toSet()
1170 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1171 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1172 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1173 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1175 # Specifying non-existing skymap is an exception
1176 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1177 rows = registry.queryDataIds(
1178 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1179 ).toSet()
1181 def testSpatialJoin(self):
1182 """Test queries that involve spatial overlap joins."""
1183 registry = self.makeRegistry()
1184 self.loadData(registry, "hsc-rc2-subset.yaml")
1186 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1187 # the TopologicalFamily they belong to. We'll relate all elements in
1188 # each family to all of the elements in each other family.
1189 families = defaultdict(set)
1190 # Dictionary of {element.name: {dataId: region}}.
1191 regions = {}
1192 for element in registry.dimensions.getDatabaseElements():
1193 if element.spatial is not None:
1194 families[element.spatial.name].add(element)
1195 regions[element.name] = {
1196 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1197 }
1199 # If this check fails, it's not necessarily a problem - it may just be
1200 # a reasonable change to the default dimension definitions - but the
1201 # test below depends on there being more than one family to do anything
1202 # useful.
1203 self.assertEqual(len(families), 2)
1205 # Overlap DatabaseDimensionElements with each other.
1206 for family1, family2 in itertools.combinations(families, 2):
1207 for element1, element2 in itertools.product(families[family1], families[family2]):
1208 graph = DimensionGraph.union(element1.graph, element2.graph)
1209 # Construct expected set of overlapping data IDs via a
1210 # brute-force comparison of the regions we've already fetched.
1211 expected = {
1212 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1213 for (dataId1, region1), (dataId2, region2) in itertools.product(
1214 regions[element1.name].items(), regions[element2.name].items()
1215 )
1216 if not region1.isDisjointFrom(region2)
1217 }
1218 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1219 queried = set(registry.queryDataIds(graph))
1220 self.assertEqual(expected, queried)
1222 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1223 commonSkyPix = registry.dimensions.commonSkyPix
1224 for elementName, regions in regions.items():
1225 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1226 expected = set()
1227 for dataId, region in regions.items():
1228 for begin, end in commonSkyPix.pixelization.envelope(region):
1229 expected.update(
1230 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1231 for index in range(begin, end)
1232 )
1233 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1234 queried = set(registry.queryDataIds(graph))
1235 self.assertEqual(expected, queried)
1237 def testAbstractQuery(self):
1238 """Test that we can run a query that just lists the known
1239 bands. This is tricky because band is
1240 backed by a query against physical_filter.
1241 """
1242 registry = self.makeRegistry()
1243 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1244 registry.insertDimensionData(
1245 "physical_filter",
1246 dict(instrument="DummyCam", name="dummy_i", band="i"),
1247 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1248 dict(instrument="DummyCam", name="dummy_r", band="r"),
1249 )
1250 rows = registry.queryDataIds(["band"]).toSet()
1251 self.assertCountEqual(
1252 rows,
1253 [
1254 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1255 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1256 ],
1257 )
1259 def testAttributeManager(self):
1260 """Test basic functionality of attribute manager."""
1261 # number of attributes with schema versions in a fresh database,
1262 # 6 managers with 3 records per manager, plus config for dimensions
1263 VERSION_COUNT = 6 * 3 + 1
1265 registry = self.makeRegistry()
1266 attributes = registry._managers.attributes
1268 # check what get() returns for non-existing key
1269 self.assertIsNone(attributes.get("attr"))
1270 self.assertEqual(attributes.get("attr", ""), "")
1271 self.assertEqual(attributes.get("attr", "Value"), "Value")
1272 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1274 # cannot store empty key or value
1275 with self.assertRaises(ValueError):
1276 attributes.set("", "value")
1277 with self.assertRaises(ValueError):
1278 attributes.set("attr", "")
1280 # set value of non-existing key
1281 attributes.set("attr", "value")
1282 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1283 self.assertEqual(attributes.get("attr"), "value")
1285 # update value of existing key
1286 with self.assertRaises(ButlerAttributeExistsError):
1287 attributes.set("attr", "value2")
1289 attributes.set("attr", "value2", force=True)
1290 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1291 self.assertEqual(attributes.get("attr"), "value2")
1293 # delete existing key
1294 self.assertTrue(attributes.delete("attr"))
1295 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1297 # delete non-existing key
1298 self.assertFalse(attributes.delete("non-attr"))
1300 # store bunch of keys and get the list back
1301 data = [
1302 ("version.core", "1.2.3"),
1303 ("version.dimensions", "3.2.1"),
1304 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1305 ]
1306 for key, value in data:
1307 attributes.set(key, value)
1308 items = dict(attributes.items())
1309 for key, value in data:
1310 self.assertEqual(items[key], value)
1312 def testQueryDatasetsDeduplication(self):
1313 """Test that the findFirst option to queryDatasets selects datasets
1314 from collections in the order given".
1315 """
1316 registry = self.makeRegistry()
1317 self.loadData(registry, "base.yaml")
1318 self.loadData(registry, "datasets.yaml")
1319 self.assertCountEqual(
1320 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1321 [
1322 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1323 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1324 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1325 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1326 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1327 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1328 ],
1329 )
1330 self.assertCountEqual(
1331 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1332 [
1333 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1334 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1335 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1336 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1337 ],
1338 )
1339 self.assertCountEqual(
1340 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1341 [
1342 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1343 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1344 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1345 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1346 ],
1347 )
1349 def testQueryResults(self):
1350 """Test querying for data IDs and then manipulating the QueryResults
1351 object returned to perform other queries.
1352 """
1353 registry = self.makeRegistry()
1354 self.loadData(registry, "base.yaml")
1355 self.loadData(registry, "datasets.yaml")
1356 bias = registry.getDatasetType("bias")
1357 flat = registry.getDatasetType("flat")
1358 # Obtain expected results from methods other than those we're testing
1359 # here. That includes:
1360 # - the dimensions of the data IDs we want to query:
1361 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1362 # - the dimensions of some other data IDs we'll extract from that:
1363 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1364 # - the data IDs we expect to obtain from the first queries:
1365 expectedDataIds = DataCoordinateSet(
1366 {
1367 DataCoordinate.standardize(
1368 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1369 )
1370 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1371 },
1372 graph=expectedGraph,
1373 hasFull=False,
1374 hasRecords=False,
1375 )
1376 # - the flat datasets we expect to find from those data IDs, in just
1377 # one collection (so deduplication is irrelevant):
1378 expectedFlats = [
1379 registry.findDataset(
1380 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1381 ),
1382 registry.findDataset(
1383 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1384 ),
1385 registry.findDataset(
1386 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1387 ),
1388 ]
1389 # - the data IDs we expect to extract from that:
1390 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1391 # - the bias datasets we expect to find from those data IDs, after we
1392 # subset-out the physical_filter dimension, both with duplicates:
1393 expectedAllBiases = [
1394 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1395 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1396 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1397 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1398 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1399 ]
1400 # - ...and without duplicates:
1401 expectedDeduplicatedBiases = [
1402 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1403 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1404 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1405 ]
1406 # Test against those expected results, using a "lazy" query for the
1407 # data IDs (which re-executes that query each time we use it to do
1408 # something new).
1409 dataIds = registry.queryDataIds(
1410 ["detector", "physical_filter"],
1411 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1412 instrument="Cam1",
1413 )
1414 self.assertEqual(dataIds.graph, expectedGraph)
1415 self.assertEqual(dataIds.toSet(), expectedDataIds)
1416 self.assertCountEqual(
1417 list(
1418 dataIds.findDatasets(
1419 flat,
1420 collections=["imported_r"],
1421 )
1422 ),
1423 expectedFlats,
1424 )
1425 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1426 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1427 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1428 self.assertCountEqual(
1429 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1430 expectedAllBiases,
1431 )
1432 self.assertCountEqual(
1433 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1434 expectedDeduplicatedBiases,
1435 )
1437 # Check dimensions match.
1438 with self.assertRaises(ValueError):
1439 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1441 # Use a component dataset type.
1442 self.assertCountEqual(
1443 list(
1444 subsetDataIds.findDatasets(
1445 bias.makeComponentDatasetType("image"),
1446 collections=["imported_r", "imported_g"],
1447 findFirst=False,
1448 )
1449 ),
1450 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1451 )
1453 # Use a named dataset type that does not exist and a dataset type
1454 # object that does not exist.
1455 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1456 unknown_component_type = unknown_type.makeComponentDatasetType("image")
1458 # Four combinations of unknown dataset type need to be tested.
1459 # Composite and component and string name vs dataset type object.
1460 test_type: Union[str, DatasetType]
1461 for test_type, test_type_name in (
1462 (unknown_type, unknown_type.name),
1463 (unknown_type.name, unknown_type.name),
1464 (unknown_component_type, unknown_type.name),
1465 (unknown_component_type.name, unknown_component_type.name),
1466 ):
1467 result = subsetDataIds.findDatasets(
1468 test_type, collections=["imported_r", "imported_g"], findFirst=True
1469 )
1470 self.assertEqual(result.count(), 0)
1471 self.assertIn(
1472 f"Dataset type '{test_type_name}' is not registered", "\n".join(result.explain_no_results())
1473 )
1475 # Materialize the bias dataset queries (only) by putting the results
1476 # into temporary tables, then repeat those tests.
1477 with subsetDataIds.findDatasets(
1478 bias, collections=["imported_r", "imported_g"], findFirst=False
1479 ).materialize() as biases:
1480 self.assertCountEqual(list(biases), expectedAllBiases)
1481 with subsetDataIds.findDatasets(
1482 bias, collections=["imported_r", "imported_g"], findFirst=True
1483 ).materialize() as biases:
1484 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1485 # Materialize the data ID subset query, but not the dataset queries.
1486 with subsetDataIds.materialize() as subsetDataIds:
1487 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1488 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1489 self.assertCountEqual(
1490 list(
1491 subsetDataIds.findDatasets(
1492 bias, collections=["imported_r", "imported_g"], findFirst=False
1493 )
1494 ),
1495 expectedAllBiases,
1496 )
1497 self.assertCountEqual(
1498 list(
1499 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1500 ),
1501 expectedDeduplicatedBiases,
1502 )
1503 # Materialize the dataset queries, too.
1504 with subsetDataIds.findDatasets(
1505 bias, collections=["imported_r", "imported_g"], findFirst=False
1506 ).materialize() as biases:
1507 self.assertCountEqual(list(biases), expectedAllBiases)
1508 with subsetDataIds.findDatasets(
1509 bias, collections=["imported_r", "imported_g"], findFirst=True
1510 ).materialize() as biases:
1511 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1512 # Materialize the original query, but none of the follow-up queries.
1513 with dataIds.materialize() as dataIds:
1514 self.assertEqual(dataIds.graph, expectedGraph)
1515 self.assertEqual(dataIds.toSet(), expectedDataIds)
1516 self.assertCountEqual(
1517 list(
1518 dataIds.findDatasets(
1519 flat,
1520 collections=["imported_r"],
1521 )
1522 ),
1523 expectedFlats,
1524 )
1525 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1526 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1527 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1528 self.assertCountEqual(
1529 list(
1530 subsetDataIds.findDatasets(
1531 bias, collections=["imported_r", "imported_g"], findFirst=False
1532 )
1533 ),
1534 expectedAllBiases,
1535 )
1536 self.assertCountEqual(
1537 list(
1538 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1539 ),
1540 expectedDeduplicatedBiases,
1541 )
1542 # Materialize just the bias dataset queries.
1543 with subsetDataIds.findDatasets(
1544 bias, collections=["imported_r", "imported_g"], findFirst=False
1545 ).materialize() as biases:
1546 self.assertCountEqual(list(biases), expectedAllBiases)
1547 with subsetDataIds.findDatasets(
1548 bias, collections=["imported_r", "imported_g"], findFirst=True
1549 ).materialize() as biases:
1550 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1551 # Materialize the subset data ID query, but not the dataset
1552 # queries.
1553 with subsetDataIds.materialize() as subsetDataIds:
1554 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1555 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1556 self.assertCountEqual(
1557 list(
1558 subsetDataIds.findDatasets(
1559 bias, collections=["imported_r", "imported_g"], findFirst=False
1560 )
1561 ),
1562 expectedAllBiases,
1563 )
1564 self.assertCountEqual(
1565 list(
1566 subsetDataIds.findDatasets(
1567 bias, collections=["imported_r", "imported_g"], findFirst=True
1568 )
1569 ),
1570 expectedDeduplicatedBiases,
1571 )
1572 # Materialize the bias dataset queries, too, so now we're
1573 # materializing every single step.
1574 with subsetDataIds.findDatasets(
1575 bias, collections=["imported_r", "imported_g"], findFirst=False
1576 ).materialize() as biases:
1577 self.assertCountEqual(list(biases), expectedAllBiases)
1578 with subsetDataIds.findDatasets(
1579 bias, collections=["imported_r", "imported_g"], findFirst=True
1580 ).materialize() as biases:
1581 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1583 def testEmptyDimensionsQueries(self):
1584 """Test Query and QueryResults objects in the case where there are no
1585 dimensions.
1586 """
1587 # Set up test data: one dataset type, two runs, one dataset in each.
1588 registry = self.makeRegistry()
1589 self.loadData(registry, "base.yaml")
1590 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1591 registry.registerDatasetType(schema)
1592 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1593 run1 = "run1"
1594 run2 = "run2"
1595 registry.registerRun(run1)
1596 registry.registerRun(run2)
1597 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1598 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1599 # Query directly for both of the datasets, and each one, one at a time.
1600 self.checkQueryResults(
1601 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1602 )
1603 self.checkQueryResults(
1604 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1605 [dataset1],
1606 )
1607 self.checkQueryResults(
1608 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1609 [dataset2],
1610 )
1611 # Query for data IDs with no dimensions.
1612 dataIds = registry.queryDataIds([])
1613 self.checkQueryResults(dataIds, [dataId])
1614 # Use queried data IDs to find the datasets.
1615 self.checkQueryResults(
1616 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1617 [dataset1, dataset2],
1618 )
1619 self.checkQueryResults(
1620 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1621 [dataset1],
1622 )
1623 self.checkQueryResults(
1624 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1625 [dataset2],
1626 )
1627 # Now materialize the data ID query results and repeat those tests.
1628 with dataIds.materialize() as dataIds:
1629 self.checkQueryResults(dataIds, [dataId])
1630 self.checkQueryResults(
1631 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1632 [dataset1],
1633 )
1634 self.checkQueryResults(
1635 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1636 [dataset2],
1637 )
1638 # Query for non-empty data IDs, then subset that to get the empty one.
1639 # Repeat the above tests starting from that.
1640 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1641 self.checkQueryResults(dataIds, [dataId])
1642 self.checkQueryResults(
1643 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1644 [dataset1, dataset2],
1645 )
1646 self.checkQueryResults(
1647 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1648 [dataset1],
1649 )
1650 self.checkQueryResults(
1651 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1652 [dataset2],
1653 )
1654 with dataIds.materialize() as dataIds:
1655 self.checkQueryResults(dataIds, [dataId])
1656 self.checkQueryResults(
1657 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1658 [dataset1, dataset2],
1659 )
1660 self.checkQueryResults(
1661 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1662 [dataset1],
1663 )
1664 self.checkQueryResults(
1665 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1666 [dataset2],
1667 )
1668 # Query for non-empty data IDs, then materialize, then subset to get
1669 # the empty one. Repeat again.
1670 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1671 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1672 self.checkQueryResults(dataIds, [dataId])
1673 self.checkQueryResults(
1674 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1675 [dataset1, dataset2],
1676 )
1677 self.checkQueryResults(
1678 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1679 [dataset1],
1680 )
1681 self.checkQueryResults(
1682 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1683 [dataset2],
1684 )
1685 with dataIds.materialize() as dataIds:
1686 self.checkQueryResults(dataIds, [dataId])
1687 self.checkQueryResults(
1688 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1689 [dataset1, dataset2],
1690 )
1691 self.checkQueryResults(
1692 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1693 [dataset1],
1694 )
1695 self.checkQueryResults(
1696 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1697 [dataset2],
1698 )
1699 # Query for non-empty data IDs with a constraint on an empty-data-ID
1700 # dataset that exists.
1701 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1702 self.checkQueryResults(
1703 dataIds.subset(unique=True),
1704 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1705 )
1706 # Again query for non-empty data IDs with a constraint on empty-data-ID
1707 # datasets, but when the datasets don't exist. We delete the existing
1708 # dataset and query just that collection rather than creating a new
1709 # empty collection because this is a bit less likely for our build-time
1710 # logic to shortcut-out (via the collection summaries), and such a
1711 # shortcut would make this test a bit more trivial than we'd like.
1712 registry.removeDatasets([dataset2])
1713 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1714 self.checkQueryResults(dataIds, [])
1716 def testDimensionDataModifications(self):
1717 """Test that modifying dimension records via:
1718 syncDimensionData(..., update=True) and
1719 insertDimensionData(..., replace=True) works as expected, even in the
1720 presence of datasets using those dimensions and spatial overlap
1721 relationships.
1722 """
1724 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1725 """Unpack a sphgeom.RangeSet into the integers it contains."""
1726 for begin, end in ranges:
1727 yield from range(begin, end)
1729 def range_set_hull(
1730 ranges: lsst.sphgeom.RangeSet,
1731 pixelization: lsst.sphgeom.HtmPixelization,
1732 ) -> lsst.sphgeom.ConvexPolygon:
1733 """Create a ConvexPolygon hull of the region defined by a set of
1734 HTM pixelization index ranges.
1735 """
1736 points = []
1737 for index in unpack_range_set(ranges):
1738 points.extend(pixelization.triangle(index).getVertices())
1739 return lsst.sphgeom.ConvexPolygon(points)
1741 # Use HTM to set up an initial parent region (one arbitrary trixel)
1742 # and four child regions (the trixels within the parent at the next
1743 # level. We'll use the parent as a tract/visit region and the children
1744 # as its patch/visit_detector regions.
1745 registry = self.makeRegistry()
1746 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1747 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1748 index = 12288
1749 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1750 assert htm6.universe().contains(child_ranges_small)
1751 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1752 parent_region_small = lsst.sphgeom.ConvexPolygon(
1753 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1754 )
1755 assert all(parent_region_small.contains(c) for c in child_regions_small)
1756 # Make a larger version of each child region, defined to be the set of
1757 # htm6 trixels that overlap the original's bounding circle. Make a new
1758 # parent that's the convex hull of the new children.
1759 child_regions_large = [
1760 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1761 ]
1762 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1763 parent_region_large = lsst.sphgeom.ConvexPolygon(
1764 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1765 )
1766 assert all(parent_region_large.contains(c) for c in child_regions_large)
1767 assert parent_region_large.contains(parent_region_small)
1768 assert not parent_region_small.contains(parent_region_large)
1769 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1770 # Find some commonSkyPix indices that overlap the large regions but not
1771 # overlap the small regions. We use commonSkyPix here to make sure the
1772 # real tests later involve what's in the database, not just post-query
1773 # region filtering.
1774 child_difference_indices = []
1775 for large, small in zip(child_regions_large, child_regions_small):
1776 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1777 assert difference, "if this is empty, we can't test anything useful with these regions"
1778 assert all(
1779 not commonSkyPix.triangle(d).isDisjointFrom(large)
1780 and commonSkyPix.triangle(d).isDisjointFrom(small)
1781 for d in difference
1782 )
1783 child_difference_indices.append(difference)
1784 parent_difference_indices = list(
1785 unpack_range_set(
1786 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1787 )
1788 )
1789 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1790 assert all(
1791 (
1792 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1793 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1794 )
1795 for d in parent_difference_indices
1796 )
1797 # Now that we've finally got those regions, we'll insert the large ones
1798 # as tract/patch dimension records.
1799 skymap_name = "testing_v1"
1800 registry.insertDimensionData(
1801 "skymap",
1802 {
1803 "name": skymap_name,
1804 "hash": bytes([42]),
1805 "tract_max": 1,
1806 "patch_nx_max": 2,
1807 "patch_ny_max": 2,
1808 },
1809 )
1810 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1811 registry.insertDimensionData(
1812 "patch",
1813 *[
1814 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1815 for n, c in enumerate(child_regions_large)
1816 ],
1817 )
1818 # Add at dataset that uses these dimensions to make sure that modifying
1819 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1820 # implement insert with replace=True as delete-then-insert).
1821 dataset_type = DatasetType(
1822 "coadd",
1823 dimensions=["tract", "patch"],
1824 universe=registry.dimensions,
1825 storageClass="Exposure",
1826 )
1827 registry.registerDatasetType(dataset_type)
1828 registry.registerCollection("the_run", CollectionType.RUN)
1829 registry.insertDatasets(
1830 dataset_type,
1831 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1832 run="the_run",
1833 )
1834 # Query for tracts and patches that overlap some "difference" htm9
1835 # pixels; there should be overlaps, because the database has
1836 # the "large" suite of regions.
1837 self.assertEqual(
1838 {0},
1839 {
1840 data_id["tract"]
1841 for data_id in registry.queryDataIds(
1842 ["tract"],
1843 skymap=skymap_name,
1844 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1845 )
1846 },
1847 )
1848 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1849 self.assertIn(
1850 patch_id,
1851 {
1852 data_id["patch"]
1853 for data_id in registry.queryDataIds(
1854 ["patch"],
1855 skymap=skymap_name,
1856 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1857 )
1858 },
1859 )
1860 # Use sync to update the tract region and insert to update the patch
1861 # regions, to the "small" suite.
1862 updated = registry.syncDimensionData(
1863 "tract",
1864 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1865 update=True,
1866 )
1867 self.assertEqual(updated, {"region": parent_region_large})
1868 registry.insertDimensionData(
1869 "patch",
1870 *[
1871 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1872 for n, c in enumerate(child_regions_small)
1873 ],
1874 replace=True,
1875 )
1876 # Query again; there now should be no such overlaps, because the
1877 # database has the "small" suite of regions.
1878 self.assertFalse(
1879 set(
1880 registry.queryDataIds(
1881 ["tract"],
1882 skymap=skymap_name,
1883 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1884 )
1885 )
1886 )
1887 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1888 self.assertNotIn(
1889 patch_id,
1890 {
1891 data_id["patch"]
1892 for data_id in registry.queryDataIds(
1893 ["patch"],
1894 skymap=skymap_name,
1895 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1896 )
1897 },
1898 )
1899 # Update back to the large regions and query one more time.
1900 updated = registry.syncDimensionData(
1901 "tract",
1902 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1903 update=True,
1904 )
1905 self.assertEqual(updated, {"region": parent_region_small})
1906 registry.insertDimensionData(
1907 "patch",
1908 *[
1909 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1910 for n, c in enumerate(child_regions_large)
1911 ],
1912 replace=True,
1913 )
1914 self.assertEqual(
1915 {0},
1916 {
1917 data_id["tract"]
1918 for data_id in registry.queryDataIds(
1919 ["tract"],
1920 skymap=skymap_name,
1921 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1922 )
1923 },
1924 )
1925 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1926 self.assertIn(
1927 patch_id,
1928 {
1929 data_id["patch"]
1930 for data_id in registry.queryDataIds(
1931 ["patch"],
1932 skymap=skymap_name,
1933 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1934 )
1935 },
1936 )
1938 def testCalibrationCollections(self):
1939 """Test operations on `~CollectionType.CALIBRATION` collections,
1940 including `Registry.certify`, `Registry.decertify`, and
1941 `Registry.findDataset`.
1942 """
1943 # Setup - make a Registry, fill it with some datasets in
1944 # non-calibration collections.
1945 registry = self.makeRegistry()
1946 self.loadData(registry, "base.yaml")
1947 self.loadData(registry, "datasets.yaml")
1948 # Set up some timestamps.
1949 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1950 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1951 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1952 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1953 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1954 allTimespans = [
1955 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1956 ]
1957 # Get references to some datasets.
1958 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1959 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1960 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1961 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1962 # Register the main calibration collection we'll be working with.
1963 collection = "Cam1/calibs/default"
1964 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1965 # Cannot associate into a calibration collection (no timespan).
1966 with self.assertRaises(CollectionTypeError):
1967 registry.associate(collection, [bias2a])
1968 # Certify 2a dataset with [t2, t4) validity.
1969 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1970 # Test that we can query for this dataset via the new collection, both
1971 # on its own and with a RUN collection, as long as we don't try to join
1972 # in temporal dimensions or use findFirst=True.
1973 self.assertEqual(
1974 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
1975 {bias2a},
1976 )
1977 self.assertEqual(
1978 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
1979 {
1980 bias2a,
1981 bias2b,
1982 bias3b,
1983 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1984 },
1985 )
1986 self.assertEqual(
1987 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
1988 {registry.expandDataId(instrument="Cam1", detector=2)},
1989 )
1990 self.assertEqual(
1991 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
1992 {
1993 registry.expandDataId(instrument="Cam1", detector=2),
1994 registry.expandDataId(instrument="Cam1", detector=3),
1995 registry.expandDataId(instrument="Cam1", detector=4),
1996 },
1997 )
1999 # We should not be able to certify 2b with anything overlapping that
2000 # window.
2001 with self.assertRaises(ConflictingDefinitionError):
2002 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2003 with self.assertRaises(ConflictingDefinitionError):
2004 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2005 with self.assertRaises(ConflictingDefinitionError):
2006 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2007 with self.assertRaises(ConflictingDefinitionError):
2008 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2009 with self.assertRaises(ConflictingDefinitionError):
2010 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2011 with self.assertRaises(ConflictingDefinitionError):
2012 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2013 with self.assertRaises(ConflictingDefinitionError):
2014 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2015 with self.assertRaises(ConflictingDefinitionError):
2016 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2017 # We should be able to certify 3a with a range overlapping that window,
2018 # because it's for a different detector.
2019 # We'll certify 3a over [t1, t3).
2020 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2021 # Now we'll certify 2b and 3b together over [t4, ∞).
2022 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2024 # Fetch all associations and check that they are what we expect.
2025 self.assertCountEqual(
2026 list(
2027 registry.queryDatasetAssociations(
2028 "bias",
2029 collections=[collection, "imported_g", "imported_r"],
2030 )
2031 ),
2032 [
2033 DatasetAssociation(
2034 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2035 collection="imported_g",
2036 timespan=None,
2037 ),
2038 DatasetAssociation(
2039 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2040 collection="imported_r",
2041 timespan=None,
2042 ),
2043 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2044 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2045 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2046 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2047 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2048 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2049 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2050 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2051 ],
2052 )
2054 class Ambiguous:
2055 """Tag class to denote lookups that should be ambiguous."""
2057 pass
2059 def assertLookup(
2060 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2061 ) -> None:
2062 """Local function that asserts that a bias lookup returns the given
2063 expected result.
2064 """
2065 if expected is Ambiguous:
2066 with self.assertRaises(RuntimeError):
2067 registry.findDataset(
2068 "bias",
2069 collections=collection,
2070 instrument="Cam1",
2071 detector=detector,
2072 timespan=timespan,
2073 )
2074 else:
2075 self.assertEqual(
2076 expected,
2077 registry.findDataset(
2078 "bias",
2079 collections=collection,
2080 instrument="Cam1",
2081 detector=detector,
2082 timespan=timespan,
2083 ),
2084 )
2086 # Systematically test lookups against expected results.
2087 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2088 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2089 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2090 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2091 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2092 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2093 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2094 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2095 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2096 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2097 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2098 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2099 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2100 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2101 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2102 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2103 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2104 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2105 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2106 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2107 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2108 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2109 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2110 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2111 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2112 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2113 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2114 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2115 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2116 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2117 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2118 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2119 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2120 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2121 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2122 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2123 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2124 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2125 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2126 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2127 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2128 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2130 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2131 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2132 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2133 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2134 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2135 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2136 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2137 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2138 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2139 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2140 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2141 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2142 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2143 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2144 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2145 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2146 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2147 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2148 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2149 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2150 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2151 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2152 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2153 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2154 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2155 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2156 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2157 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2158 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2159 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2160 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2161 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2162 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2163 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2164 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2165 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2166 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2167 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2168 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2169 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2170 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2171 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2172 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2173 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2174 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2175 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2177 # Decertify everything, this time with explicit data IDs, then check
2178 # that no lookups succeed.
2179 registry.decertify(
2180 collection,
2181 "bias",
2182 Timespan(None, None),
2183 dataIds=[
2184 dict(instrument="Cam1", detector=2),
2185 dict(instrument="Cam1", detector=3),
2186 ],
2187 )
2188 for detector in (2, 3):
2189 for timespan in allTimespans:
2190 assertLookup(detector=detector, timespan=timespan, expected=None)
2191 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2192 # those.
2193 registry.certify(
2194 collection,
2195 [bias2a, bias3a],
2196 Timespan(None, None),
2197 )
2198 for timespan in allTimespans:
2199 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2200 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2201 # Decertify just bias2 over [t2, t4).
2202 # This should split a single certification row into two (and leave the
2203 # other existing row, for bias3a, alone).
2204 registry.decertify(
2205 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2206 )
2207 for timespan in allTimespans:
2208 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2209 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2210 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2211 if overlapsBefore and overlapsAfter:
2212 expected = Ambiguous
2213 elif overlapsBefore or overlapsAfter:
2214 expected = bias2a
2215 else:
2216 expected = None
2217 assertLookup(detector=2, timespan=timespan, expected=expected)
2219 def testSkipCalibs(self):
2220 """Test how queries handle skipping of calibration collections."""
2221 registry = self.makeRegistry()
2222 self.loadData(registry, "base.yaml")
2223 self.loadData(registry, "datasets.yaml")
2225 coll_calib = "Cam1/calibs/default"
2226 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2228 # Add all biases to the calibration collection.
2229 # Without this, the logic that prunes dataset subqueries based on
2230 # datasetType-collection summary information will fire before the logic
2231 # we want to test below. This is a good thing (it avoids the dreaded
2232 # NotImplementedError a bit more often) everywhere but here.
2233 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2235 coll_list = [coll_calib, "imported_g", "imported_r"]
2236 chain = "Cam1/chain"
2237 registry.registerCollection(chain, type=CollectionType.CHAINED)
2238 registry.setCollectionChain(chain, coll_list)
2240 # explicit list will raise if findFirst=True or there are temporal
2241 # dimensions
2242 with self.assertRaises(NotImplementedError):
2243 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2244 with self.assertRaises(NotImplementedError):
2245 registry.queryDataIds(
2246 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2247 ).count()
2249 # chain will skip
2250 datasets = list(registry.queryDatasets("bias", collections=chain))
2251 self.assertGreater(len(datasets), 0)
2253 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2254 self.assertGreater(len(dataIds), 0)
2256 # glob will skip too
2257 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2258 self.assertGreater(len(datasets), 0)
2260 # regular expression will skip too
2261 pattern = re.compile(".*")
2262 datasets = list(registry.queryDatasets("bias", collections=pattern))
2263 self.assertGreater(len(datasets), 0)
2265 # ellipsis should work as usual
2266 datasets = list(registry.queryDatasets("bias", collections=...))
2267 self.assertGreater(len(datasets), 0)
2269 # few tests with findFirst
2270 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2271 self.assertGreater(len(datasets), 0)
2273 def testIngestTimeQuery(self):
2275 registry = self.makeRegistry()
2276 self.loadData(registry, "base.yaml")
2277 dt0 = datetime.utcnow()
2278 self.loadData(registry, "datasets.yaml")
2279 dt1 = datetime.utcnow()
2281 datasets = list(registry.queryDatasets(..., collections=...))
2282 len0 = len(datasets)
2283 self.assertGreater(len0, 0)
2285 where = "ingest_date > T'2000-01-01'"
2286 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2287 len1 = len(datasets)
2288 self.assertEqual(len0, len1)
2290 # no one will ever use this piece of software in 30 years
2291 where = "ingest_date > T'2050-01-01'"
2292 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2293 len2 = len(datasets)
2294 self.assertEqual(len2, 0)
2296 # Check more exact timing to make sure there is no 37 seconds offset
2297 # (after fixing DM-30124). SQLite time precision is 1 second, make
2298 # sure that we don't test with higher precision.
2299 tests = [
2300 # format: (timestamp, operator, expected_len)
2301 (dt0 - timedelta(seconds=1), ">", len0),
2302 (dt0 - timedelta(seconds=1), "<", 0),
2303 (dt1 + timedelta(seconds=1), "<", len0),
2304 (dt1 + timedelta(seconds=1), ">", 0),
2305 ]
2306 for dt, op, expect_len in tests:
2307 dt_str = dt.isoformat(sep=" ")
2309 where = f"ingest_date {op} T'{dt_str}'"
2310 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2311 self.assertEqual(len(datasets), expect_len)
2313 # same with bind using datetime or astropy Time
2314 where = f"ingest_date {op} ingest_time"
2315 datasets = list(
2316 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2317 )
2318 self.assertEqual(len(datasets), expect_len)
2320 dt_astropy = astropy.time.Time(dt, format="datetime")
2321 datasets = list(
2322 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2323 )
2324 self.assertEqual(len(datasets), expect_len)
2326 def testTimespanQueries(self):
2327 """Test query expressions involving timespans."""
2328 registry = self.makeRegistry()
2329 self.loadData(registry, "hsc-rc2-subset.yaml")
2330 # All exposures in the database; mapping from ID to timespan.
2331 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2332 # Just those IDs, sorted (which is also temporal sorting, because HSC
2333 # exposure IDs are monotonically increasing).
2334 ids = sorted(visits.keys())
2335 self.assertGreater(len(ids), 20)
2336 # Pick some quasi-random indexes into `ids` to play with.
2337 i1 = int(len(ids) * 0.1)
2338 i2 = int(len(ids) * 0.3)
2339 i3 = int(len(ids) * 0.6)
2340 i4 = int(len(ids) * 0.8)
2341 # Extract some times from those: just before the beginning of i1 (which
2342 # should be after the end of the exposure before), exactly the
2343 # beginning of i2, just after the beginning of i3 (and before its end),
2344 # and the exact end of i4.
2345 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2346 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2347 t2 = visits[ids[i2]].begin
2348 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2349 self.assertLess(t3, visits[ids[i3]].end)
2350 t4 = visits[ids[i4]].end
2351 # Make sure those are actually in order.
2352 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2354 bind = {
2355 "t1": t1,
2356 "t2": t2,
2357 "t3": t3,
2358 "t4": t4,
2359 "ts23": Timespan(t2, t3),
2360 }
2362 def query(where):
2363 """Helper function that queries for visit data IDs and returns
2364 results as a sorted, deduplicated list of visit IDs.
2365 """
2366 return sorted(
2367 {
2368 dataId["visit"]
2369 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2370 }
2371 )
2373 # Try a bunch of timespan queries, mixing up the bounds themselves,
2374 # where they appear in the expression, and how we get the timespan into
2375 # the expression.
2377 # t1 is before the start of i1, so this should not include i1.
2378 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2379 # t2 is exactly at the start of i2, but ends are exclusive, so these
2380 # should not include i2.
2381 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2382 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2383 # t3 is in the middle of i3, so this should include i3.
2384 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2385 # This one should not include t3 by the same reasoning.
2386 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2387 # t4 is exactly at the end of i4, so this should include i4.
2388 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2389 # i4's upper bound of t4 is exclusive so this should not include t4.
2390 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2392 # Now some timespan vs. time scalar queries.
2393 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2394 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2395 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2396 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2397 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2398 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2400 # Empty timespans should not overlap anything.
2401 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2403 def testCollectionSummaries(self):
2404 """Test recording and retrieval of collection summaries."""
2405 self.maxDiff = None
2406 registry = self.makeRegistry()
2407 # Importing datasets from yaml should go through the code path where
2408 # we update collection summaries as we insert datasets.
2409 self.loadData(registry, "base.yaml")
2410 self.loadData(registry, "datasets.yaml")
2411 flat = registry.getDatasetType("flat")
2412 expected1 = CollectionSummary()
2413 expected1.dataset_types.add(registry.getDatasetType("bias"))
2414 expected1.add_data_ids(
2415 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2416 )
2417 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2418 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2419 # Create a chained collection with both of the imported runs; the
2420 # summary should be the same, because it's a union with itself.
2421 chain = "chain"
2422 registry.registerCollection(chain, CollectionType.CHAINED)
2423 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2424 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2425 # Associate flats only into a tagged collection and a calibration
2426 # collection to check summaries of those.
2427 tag = "tag"
2428 registry.registerCollection(tag, CollectionType.TAGGED)
2429 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2430 calibs = "calibs"
2431 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2432 registry.certify(
2433 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2434 )
2435 expected2 = expected1.copy()
2436 expected2.dataset_types.discard("bias")
2437 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2438 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2439 # Explicitly calling Registry.refresh() should load those same
2440 # summaries, via a totally different code path.
2441 registry.refresh()
2442 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2443 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2444 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2445 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2447 def testBindInQueryDatasets(self):
2448 """Test that the bind parameter is correctly forwarded in
2449 queryDatasets recursion.
2450 """
2451 registry = self.makeRegistry()
2452 # Importing datasets from yaml should go through the code path where
2453 # we update collection summaries as we insert datasets.
2454 self.loadData(registry, "base.yaml")
2455 self.loadData(registry, "datasets.yaml")
2456 self.assertEqual(
2457 set(registry.queryDatasets("flat", band="r", collections=...)),
2458 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2459 )
2461 def testQueryResultSummaries(self):
2462 """Test summary methods like `count`, `any`, and `explain_no_results`
2463 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2464 """
2465 registry = self.makeRegistry()
2466 self.loadData(registry, "base.yaml")
2467 self.loadData(registry, "datasets.yaml")
2468 self.loadData(registry, "spatial.yaml")
2469 # Default test dataset has two collections, each with both flats and
2470 # biases. Add a new collection with only biases.
2471 registry.registerCollection("biases", CollectionType.TAGGED)
2472 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2473 # First query yields two results, and involves no postprocessing.
2474 query1 = registry.queryDataIds(["physical_filter"], band="r")
2475 self.assertTrue(query1.any(execute=False, exact=False))
2476 self.assertTrue(query1.any(execute=True, exact=False))
2477 self.assertTrue(query1.any(execute=True, exact=True))
2478 self.assertEqual(query1.count(exact=False), 2)
2479 self.assertEqual(query1.count(exact=True), 2)
2480 self.assertFalse(list(query1.explain_no_results()))
2481 # Second query should yield no results, but this isn't detectable
2482 # unless we actually run a query.
2483 query2 = registry.queryDataIds(["physical_filter"], band="h")
2484 self.assertTrue(query2.any(execute=False, exact=False))
2485 self.assertFalse(query2.any(execute=True, exact=False))
2486 self.assertFalse(query2.any(execute=True, exact=True))
2487 self.assertEqual(query2.count(exact=False), 0)
2488 self.assertEqual(query2.count(exact=True), 0)
2489 self.assertFalse(list(query2.explain_no_results()))
2490 # These queries yield no results due to various problems that can be
2491 # spotted prior to execution, yielding helpful diagnostics.
2492 base_query = registry.queryDataIds(["detector", "physical_filter"])
2493 for query, snippets in [
2494 (
2495 # Dataset type name doesn't match any existing dataset types.
2496 registry.queryDatasets("nonexistent", collections=...),
2497 ["nonexistent"],
2498 ),
2499 (
2500 # Dataset type name doesn't match any existing dataset types.
2501 base_query.findDatasets("nonexistent", collections=["biases"]),
2502 ["nonexistent"],
2503 ),
2504 (
2505 # Dataset type name doesn't match any existing dataset types.
2506 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2507 ["nonexistent"],
2508 ),
2509 (
2510 # Dataset type object isn't registered.
2511 registry.queryDatasets(
2512 DatasetType(
2513 "nonexistent",
2514 dimensions=["instrument"],
2515 universe=registry.dimensions,
2516 storageClass="Image",
2517 ),
2518 collections=...,
2519 ),
2520 ["nonexistent"],
2521 ),
2522 (
2523 # Dataset type object isn't registered.
2524 base_query.findDatasets(
2525 DatasetType(
2526 "nonexistent",
2527 dimensions=["instrument"],
2528 universe=registry.dimensions,
2529 storageClass="Image",
2530 ),
2531 collections=["biases"],
2532 ),
2533 ["nonexistent"],
2534 ),
2535 (
2536 # No datasets of this type in this collection.
2537 registry.queryDatasets("flat", collections=["biases"]),
2538 ["flat", "biases"],
2539 ),
2540 (
2541 # No datasets of this type in this collection.
2542 base_query.findDatasets("flat", collections=["biases"]),
2543 ["flat", "biases"],
2544 ),
2545 (
2546 # No collections matching at all.
2547 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2548 ["potato"],
2549 ),
2550 (
2551 # Dataset type name doesn't match any existing dataset types.
2552 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2553 ["nonexistent"],
2554 ),
2555 ]:
2557 self.assertFalse(query.any(execute=False, exact=False))
2558 self.assertFalse(query.any(execute=True, exact=False))
2559 self.assertFalse(query.any(execute=True, exact=True))
2560 self.assertEqual(query.count(exact=False), 0)
2561 self.assertEqual(query.count(exact=True), 0)
2562 messages = list(query.explain_no_results())
2563 self.assertTrue(messages)
2564 # Want all expected snippets to appear in at least one message.
2565 self.assertTrue(
2566 any(
2567 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2568 ),
2569 messages,
2570 )
2572 # These queries yield no results due to problems that can be identified
2573 # by cheap follow-up queries, yielding helpful diagnostics.
2574 for query, snippets in [
2575 (
2576 # No records for one of the involved dimensions.
2577 registry.queryDataIds(["subfilter"]),
2578 ["dimension records", "subfilter"],
2579 ),
2580 (
2581 # No records for one of the involved dimensions.
2582 registry.queryDimensionRecords("subfilter"),
2583 ["dimension records", "subfilter"],
2584 ),
2585 ]:
2586 self.assertFalse(query.any(execute=True, exact=False))
2587 self.assertFalse(query.any(execute=True, exact=True))
2588 self.assertEqual(query.count(exact=True), 0)
2589 messages = list(query.explain_no_results())
2590 self.assertTrue(messages)
2591 # Want all expected snippets to appear in at least one message.
2592 self.assertTrue(
2593 any(
2594 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2595 ),
2596 messages,
2597 )
2599 # This query yields four overlaps in the database, but one is filtered
2600 # out in postprocessing. The count queries aren't accurate because
2601 # they don't account for duplication that happens due to an internal
2602 # join against commonSkyPix.
2603 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2604 self.assertEqual(
2605 {
2606 DataCoordinate.standardize(
2607 instrument="Cam1",
2608 skymap="SkyMap1",
2609 visit=v,
2610 tract=t,
2611 universe=registry.dimensions,
2612 )
2613 for v, t in [(1, 0), (2, 0), (2, 1)]
2614 },
2615 set(query3),
2616 )
2617 self.assertTrue(query3.any(execute=False, exact=False))
2618 self.assertTrue(query3.any(execute=True, exact=False))
2619 self.assertTrue(query3.any(execute=True, exact=True))
2620 self.assertGreaterEqual(query3.count(exact=False), 4)
2621 self.assertGreaterEqual(query3.count(exact=True), 3)
2622 self.assertFalse(list(query3.explain_no_results()))
2623 # This query yields overlaps in the database, but all are filtered
2624 # out in postprocessing. The count queries again aren't very useful.
2625 # We have to use `where=` here to avoid an optimization that
2626 # (currently) skips the spatial postprocess-filtering because it
2627 # recognizes that no spatial join is necessary. That's not ideal, but
2628 # fixing it is out of scope for this ticket.
2629 query4 = registry.queryDataIds(
2630 ["visit", "tract"],
2631 instrument="Cam1",
2632 skymap="SkyMap1",
2633 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2634 )
2635 self.assertFalse(set(query4))
2636 self.assertTrue(query4.any(execute=False, exact=False))
2637 self.assertTrue(query4.any(execute=True, exact=False))
2638 self.assertFalse(query4.any(execute=True, exact=True))
2639 self.assertGreaterEqual(query4.count(exact=False), 1)
2640 self.assertEqual(query4.count(exact=True), 0)
2641 messages = list(query4.explain_no_results())
2642 self.assertTrue(messages)
2643 self.assertTrue(any("regions did not overlap" in message for message in messages))
2645 # And there are cases when queries make empty results but we do not
2646 # know how to explain that yet (could we just say miracles happen?)
2647 query5 = registry.queryDimensionRecords(
2648 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2649 )
2650 self.assertEqual(query5.count(exact=True), 0)
2651 messages = list(query5.explain_no_results())
2652 self.assertFalse(messages)
2654 def testQueryDataIdsOrderBy(self):
2655 """Test order_by and limit on result returned by queryDataIds()."""
2656 registry = self.makeRegistry()
2657 self.loadData(registry, "base.yaml")
2658 self.loadData(registry, "datasets.yaml")
2659 self.loadData(registry, "spatial.yaml")
2661 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2662 return registry.queryDataIds(
2663 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2664 )
2666 Test = namedtuple(
2667 "testQueryDataIdsOrderByTest",
2668 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2669 defaults=(None, None, None),
2670 )
2672 test_data = (
2673 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2674 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2675 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2676 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2677 Test(
2678 "tract.id,visit.id",
2679 "tract,visit",
2680 ((0, 1), (0, 1), (0, 2)),
2681 limit=(3,),
2682 ),
2683 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2684 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2685 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2686 Test(
2687 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2688 ),
2689 Test(
2690 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2691 ),
2692 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2693 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2694 Test(
2695 "tract,-timespan.begin,timespan.end",
2696 "tract,visit",
2697 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2698 ),
2699 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2700 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2701 Test(
2702 "tract,detector",
2703 "tract,detector",
2704 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2705 datasets="flat",
2706 collections="imported_r",
2707 ),
2708 Test(
2709 "tract,detector.full_name",
2710 "tract,detector",
2711 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2712 datasets="flat",
2713 collections="imported_r",
2714 ),
2715 Test(
2716 "tract,detector.raft,detector.name_in_raft",
2717 "tract,detector",
2718 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2719 datasets="flat",
2720 collections="imported_r",
2721 ),
2722 )
2724 for test in test_data:
2725 order_by = test.order_by.split(",")
2726 keys = test.keys.split(",")
2727 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2728 if test.limit is not None:
2729 query = query.limit(*test.limit)
2730 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2731 self.assertEqual(dataIds, test.result)
2733 # and materialize
2734 query = do_query(keys).order_by(*order_by)
2735 if test.limit is not None:
2736 query = query.limit(*test.limit)
2737 with query.materialize() as materialized:
2738 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2739 self.assertEqual(dataIds, test.result)
2741 # errors in a name
2742 for order_by in ("", "-"):
2743 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2744 list(do_query().order_by(order_by))
2746 for order_by in ("undimension.name", "-undimension.name"):
2747 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2748 list(do_query().order_by(order_by))
2750 for order_by in ("attract", "-attract"):
2751 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2752 list(do_query().order_by(order_by))
2754 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2755 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2757 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2758 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2760 with self.assertRaisesRegex(
2761 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2762 ):
2763 list(do_query(("tract")).order_by("timespan.begin"))
2765 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2766 list(do_query(("tract")).order_by("tract.timespan.begin"))
2768 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2769 list(do_query(("tract")).order_by("tract.name"))
2771 def testQueryDataIdsGovernorExceptions(self):
2772 """Test exceptions raised by queryDataIds() for incorrect governors."""
2773 registry = self.makeRegistry()
2774 self.loadData(registry, "base.yaml")
2775 self.loadData(registry, "datasets.yaml")
2776 self.loadData(registry, "spatial.yaml")
2778 def do_query(dimensions, dataId=None, where=None, bind=None, **kwargs):
2779 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2781 Test = namedtuple(
2782 "testQueryDataIdExceptionsTest",
2783 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2784 defaults=(None, None, None, {}, None, 0),
2785 )
2787 test_data = (
2788 Test("tract,visit", count=6),
2789 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2790 Test(
2791 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2792 ),
2793 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2794 Test(
2795 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2796 ),
2797 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2798 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2799 Test(
2800 "tract,visit",
2801 where="instrument=cam AND skymap=map",
2802 bind={"cam": "Cam1", "map": "SkyMap1"},
2803 count=6,
2804 ),
2805 Test(
2806 "tract,visit",
2807 where="instrument=cam AND skymap=map",
2808 bind={"cam": "Cam", "map": "SkyMap"},
2809 exception=DataIdValueError,
2810 ),
2811 )
2813 for test in test_data:
2814 dimensions = test.dimensions.split(",")
2815 if test.exception:
2816 with self.assertRaises(test.exception):
2817 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2818 else:
2819 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2820 self.assertEqual(query.count(), test.count)
2822 # and materialize
2823 if test.exception:
2824 with self.assertRaises(test.exception):
2825 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2826 with query.materialize() as materialized:
2827 materialized.count()
2828 else:
2829 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2830 with query.materialize() as materialized:
2831 self.assertEqual(materialized.count(), test.count)
2833 def testQueryDimensionRecordsOrderBy(self):
2834 """Test order_by and limit on result returned by
2835 queryDimensionRecords().
2836 """
2837 registry = self.makeRegistry()
2838 self.loadData(registry, "base.yaml")
2839 self.loadData(registry, "datasets.yaml")
2840 self.loadData(registry, "spatial.yaml")
2842 def do_query(element, datasets=None, collections=None):
2843 return registry.queryDimensionRecords(
2844 element, instrument="Cam1", datasets=datasets, collections=collections
2845 )
2847 query = do_query("detector")
2848 self.assertEqual(len(list(query)), 4)
2850 Test = namedtuple(
2851 "testQueryDataIdsOrderByTest",
2852 ("element", "order_by", "result", "limit", "datasets", "collections"),
2853 defaults=(None, None, None),
2854 )
2856 test_data = (
2857 Test("detector", "detector", (1, 2, 3, 4)),
2858 Test("detector", "-detector", (4, 3, 2, 1)),
2859 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2860 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2861 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2862 Test("visit", "visit", (1, 2)),
2863 Test("visit", "-visit.id", (2, 1)),
2864 Test("visit", "zenith_angle", (1, 2)),
2865 Test("visit", "-visit.name", (2, 1)),
2866 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2867 )
2869 for test in test_data:
2870 order_by = test.order_by.split(",")
2871 query = do_query(test.element).order_by(*order_by)
2872 if test.limit is not None:
2873 query = query.limit(*test.limit)
2874 dataIds = tuple(rec.id for rec in query)
2875 self.assertEqual(dataIds, test.result)
2877 # errors in a name
2878 for order_by in ("", "-"):
2879 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2880 list(do_query("detector").order_by(order_by))
2882 for order_by in ("undimension.name", "-undimension.name"):
2883 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2884 list(do_query("detector").order_by(order_by))
2886 for order_by in ("attract", "-attract"):
2887 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
2888 list(do_query("detector").order_by(order_by))
2890 def testQueryDimensionRecordsExceptions(self):
2891 """Test exceptions raised by queryDimensionRecords()."""
2892 registry = self.makeRegistry()
2893 self.loadData(registry, "base.yaml")
2894 self.loadData(registry, "datasets.yaml")
2895 self.loadData(registry, "spatial.yaml")
2897 result = registry.queryDimensionRecords("detector")
2898 self.assertEqual(result.count(), 4)
2899 result = registry.queryDimensionRecords("detector", instrument="Cam1")
2900 self.assertEqual(result.count(), 4)
2901 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
2902 self.assertEqual(result.count(), 4)
2903 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
2904 self.assertEqual(result.count(), 4)
2905 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
2906 self.assertEqual(result.count(), 4)
2908 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
2909 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
2910 result.count()
2912 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
2913 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
2914 result.count()
2916 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2917 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
2918 result.count()
2920 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2921 result = registry.queryDimensionRecords(
2922 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
2923 )
2924 result.count()
2926 def testDatasetConstrainedDimensionRecordQueries(self):
2927 """Test that queryDimensionRecords works even when given a dataset
2928 constraint whose dimensions extend beyond the requested dimension
2929 element's.
2930 """
2931 registry = self.makeRegistry()
2932 self.loadData(registry, "base.yaml")
2933 self.loadData(registry, "datasets.yaml")
2934 # Query for physical_filter dimension records, using a dataset that
2935 # has both physical_filter and dataset dimensions.
2936 records = registry.queryDimensionRecords(
2937 "physical_filter",
2938 datasets=["flat"],
2939 collections="imported_r",
2940 )
2941 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
2942 # Trying to constrain by all dataset types is an error.
2943 with self.assertRaises(TypeError):
2944 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
2946 def testSkyPixDatasetQueries(self):
2947 """Test that we can build queries involving skypix dimensions as long
2948 as a dataset type that uses those dimensions is included.
2949 """
2950 registry = self.makeRegistry()
2951 self.loadData(registry, "base.yaml")
2952 dataset_type = DatasetType(
2953 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
2954 )
2955 registry.registerDatasetType(dataset_type)
2956 run = "r"
2957 registry.registerRun(run)
2958 # First try queries where there are no datasets; the concern is whether
2959 # we can even build and execute these queries without raising, even
2960 # when "doomed" query shortcuts are in play.
2961 self.assertFalse(
2962 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
2963 )
2964 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
2965 # Now add a dataset and see that we can get it back.
2966 htm7 = registry.dimensions.skypix["htm"][7].pixelization
2967 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
2968 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
2969 self.assertEqual(
2970 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
2971 {data_id},
2972 )
2973 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
2975 def testDatasetIdFactory(self):
2976 """Simple test for DatasetIdFactory, mostly to catch potential changes
2977 in its API.
2978 """
2979 registry = self.makeRegistry()
2980 factory = registry.datasetIdFactory
2981 dataset_type = DatasetType(
2982 "datasetType",
2983 dimensions=["detector", "instrument"],
2984 universe=registry.dimensions,
2985 storageClass="int",
2986 )
2987 run = "run"
2988 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
2990 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
2991 self.assertIsInstance(datasetId, uuid.UUID)
2992 self.assertEqual(datasetId.version, 4)
2994 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
2995 self.assertIsInstance(datasetId, uuid.UUID)
2996 self.assertEqual(datasetId.version, 5)
2998 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
2999 self.assertIsInstance(datasetId, uuid.UUID)
3000 self.assertEqual(datasetId.version, 5)