Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 Timespan,
56 ddl,
57)
58from .._collectionType import CollectionType
59from .._config import RegistryConfig
60from .._exceptions import (
61 ConflictingDefinitionError,
62 InconsistentDataIdError,
63 MissingCollectionError,
64 OrphanedRecordError,
65)
66from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
67from ..summaries import CollectionSummary
69if TYPE_CHECKING: 69 ↛ 70line 69 didn't jump to line 70, because the condition on line 69 was never true
70 from .._registry import Registry
73class RegistryTests(ABC):
74 """Generic tests for the `Registry` class that can be subclassed to
75 generate tests for different configurations.
76 """
78 collectionsManager: Optional[str] = None
79 """Name of the collections manager class, if subclass provides value for
80 this member then it overrides name specified in default configuration
81 (`str`).
82 """
84 datasetsManager: Optional[str] = None
85 """Name of the datasets manager class, if subclass provides value for
86 this member then it overrides name specified in default configuration
87 (`str`).
88 """
90 @classmethod
91 @abstractmethod
92 def getDataDir(cls) -> str:
93 """Return the root directory containing test data YAML files."""
94 raise NotImplementedError()
96 def makeRegistryConfig(self) -> RegistryConfig:
97 """Create RegistryConfig used to create a registry.
99 This method should be called by a subclass from `makeRegistry`.
100 Returned instance will be pre-configured based on the values of class
101 members, and default-configured for all other parameters. Subclasses
102 that need default configuration should just instantiate
103 `RegistryConfig` directly.
104 """
105 config = RegistryConfig()
106 if self.collectionsManager:
107 config["managers", "collections"] = self.collectionsManager
108 if self.datasetsManager:
109 config["managers", "datasets"] = self.datasetsManager
110 return config
112 @abstractmethod
113 def makeRegistry(self) -> Registry:
114 """Return the Registry instance to be tested."""
115 raise NotImplementedError()
117 def loadData(self, registry: Registry, filename: str):
118 """Load registry test data from ``getDataDir/<filename>``,
119 which should be a YAML import/export file.
120 """
121 from ...transfers import YamlRepoImportBackend
123 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
124 backend = YamlRepoImportBackend(stream, registry)
125 backend.register()
126 backend.load(datastore=None)
128 def checkQueryResults(self, results, expected):
129 """Check that a query results object contains expected values.
131 Parameters
132 ----------
133 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
134 A lazy-evaluation query results object.
135 expected : `list`
136 A list of `DataCoordinate` o `DatasetRef` objects that should be
137 equal to results of the query, aside from ordering.
138 """
139 self.assertCountEqual(list(results), expected)
140 self.assertEqual(results.count(), len(expected))
141 if expected:
142 self.assertTrue(results.any())
143 else:
144 self.assertFalse(results.any())
146 def testOpaque(self):
147 """Tests for `Registry.registerOpaqueTable`,
148 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
149 `Registry.deleteOpaqueData`.
150 """
151 registry = self.makeRegistry()
152 table = "opaque_table_for_testing"
153 registry.registerOpaqueTable(
154 table,
155 spec=ddl.TableSpec(
156 fields=[
157 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
158 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
159 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
160 ],
161 ),
162 )
163 rows = [
164 {"id": 1, "name": "one", "count": None},
165 {"id": 2, "name": "two", "count": 5},
166 {"id": 3, "name": "three", "count": 6},
167 ]
168 registry.insertOpaqueData(table, *rows)
169 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
170 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
171 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
172 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
173 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
174 # Test very long IN clause which exceeds sqlite limit on number of
175 # parameters. SQLite says the limit is 32k but it looks like it is
176 # much higher.
177 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
178 # Two IN clauses, each longer than 1k batch size, first with
179 # duplicates, second has matching elements in different batches (after
180 # sorting).
181 self.assertEqual(
182 rows[0:2],
183 list(
184 registry.fetchOpaqueData(
185 table,
186 id=list(range(1000)) + list(range(100, 0, -1)),
187 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
188 )
189 ),
190 )
191 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
192 registry.deleteOpaqueData(table, id=3)
193 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
194 registry.deleteOpaqueData(table)
195 self.assertEqual([], list(registry.fetchOpaqueData(table)))
197 def testDatasetType(self):
198 """Tests for `Registry.registerDatasetType` and
199 `Registry.getDatasetType`.
200 """
201 registry = self.makeRegistry()
202 # Check valid insert
203 datasetTypeName = "test"
204 storageClass = StorageClass("testDatasetType")
205 registry.storageClasses.registerStorageClass(storageClass)
206 dimensions = registry.dimensions.extract(("instrument", "visit"))
207 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
208 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
209 # Inserting for the first time should return True
210 self.assertTrue(registry.registerDatasetType(inDatasetType))
211 outDatasetType1 = registry.getDatasetType(datasetTypeName)
212 self.assertEqual(outDatasetType1, inDatasetType)
214 # Re-inserting should work
215 self.assertFalse(registry.registerDatasetType(inDatasetType))
216 # Except when they are not identical
217 with self.assertRaises(ConflictingDefinitionError):
218 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
219 registry.registerDatasetType(nonIdenticalDatasetType)
221 # Template can be None
222 datasetTypeName = "testNoneTemplate"
223 storageClass = StorageClass("testDatasetType2")
224 registry.storageClasses.registerStorageClass(storageClass)
225 dimensions = registry.dimensions.extract(("instrument", "visit"))
226 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
227 registry.registerDatasetType(inDatasetType)
228 outDatasetType2 = registry.getDatasetType(datasetTypeName)
229 self.assertEqual(outDatasetType2, inDatasetType)
231 allTypes = set(registry.queryDatasetTypes())
232 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
234 def testDimensions(self):
235 """Tests for `Registry.insertDimensionData`,
236 `Registry.syncDimensionData`, and `Registry.expandDataId`.
237 """
238 registry = self.makeRegistry()
239 dimensionName = "instrument"
240 dimension = registry.dimensions[dimensionName]
241 dimensionValue = {
242 "name": "DummyCam",
243 "visit_max": 10,
244 "exposure_max": 10,
245 "detector_max": 2,
246 "class_name": "lsst.obs.base.Instrument",
247 }
248 registry.insertDimensionData(dimensionName, dimensionValue)
249 # Inserting the same value twice should fail
250 with self.assertRaises(sqlalchemy.exc.IntegrityError):
251 registry.insertDimensionData(dimensionName, dimensionValue)
252 # expandDataId should retrieve the record we just inserted
253 self.assertEqual(
254 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
255 .records[dimensionName]
256 .toDict(),
257 dimensionValue,
258 )
259 # expandDataId should raise if there is no record with the given ID.
260 with self.assertRaises(LookupError):
261 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
262 # band doesn't have a table; insert should fail.
263 with self.assertRaises(TypeError):
264 registry.insertDimensionData("band", {"band": "i"})
265 dimensionName2 = "physical_filter"
266 dimension2 = registry.dimensions[dimensionName2]
267 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
268 # Missing required dependency ("instrument") should fail
269 with self.assertRaises(KeyError):
270 registry.insertDimensionData(dimensionName2, dimensionValue2)
271 # Adding required dependency should fix the failure
272 dimensionValue2["instrument"] = "DummyCam"
273 registry.insertDimensionData(dimensionName2, dimensionValue2)
274 # expandDataId should retrieve the record we just inserted.
275 self.assertEqual(
276 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
277 .records[dimensionName2]
278 .toDict(),
279 dimensionValue2,
280 )
281 # Use syncDimensionData to insert a new record successfully.
282 dimensionName3 = "detector"
283 dimensionValue3 = {
284 "instrument": "DummyCam",
285 "id": 1,
286 "full_name": "one",
287 "name_in_raft": "zero",
288 "purpose": "SCIENCE",
289 }
290 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
291 # Sync that again. Note that one field ("raft") is NULL, and that
292 # should be okay.
293 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
294 # Now try that sync with the same primary key but a different value.
295 # This should fail.
296 with self.assertRaises(ConflictingDefinitionError):
297 registry.syncDimensionData(
298 dimensionName3,
299 {
300 "instrument": "DummyCam",
301 "id": 1,
302 "full_name": "one",
303 "name_in_raft": "four",
304 "purpose": "SCIENCE",
305 },
306 )
308 @unittest.skipIf(np is None, "numpy not available.")
309 def testNumpyDataId(self):
310 """Test that we can use a numpy int in a dataId."""
311 registry = self.makeRegistry()
312 dimensionEntries = [
313 ("instrument", {"instrument": "DummyCam"}),
314 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
315 # Using an np.int64 here fails unless Records.fromDict is also
316 # patched to look for numbers.Integral
317 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
318 ]
319 for args in dimensionEntries:
320 registry.insertDimensionData(*args)
322 # Try a normal integer and something that looks like an int but
323 # is not.
324 for visit_id in (42, np.int64(42)):
325 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
326 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
327 self.assertEqual(expanded["visit"], int(visit_id))
328 self.assertIsInstance(expanded["visit"], int)
330 def testDataIdRelationships(self):
331 """Test that `Registry.expandDataId` raises an exception when the given
332 keys are inconsistent.
333 """
334 registry = self.makeRegistry()
335 self.loadData(registry, "base.yaml")
336 # Insert a few more dimension records for the next test.
337 registry.insertDimensionData(
338 "exposure",
339 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
340 )
341 registry.insertDimensionData(
342 "exposure",
343 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
344 )
345 registry.insertDimensionData(
346 "visit_system",
347 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
348 )
349 registry.insertDimensionData(
350 "visit",
351 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
352 )
353 registry.insertDimensionData(
354 "visit_definition",
355 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
356 )
357 with self.assertRaises(InconsistentDataIdError):
358 registry.expandDataId(
359 {"instrument": "Cam1", "visit": 1, "exposure": 2},
360 )
362 def testDataset(self):
363 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
364 and `Registry.removeDatasets`.
365 """
366 registry = self.makeRegistry()
367 self.loadData(registry, "base.yaml")
368 run = "test"
369 registry.registerRun(run)
370 datasetType = registry.getDatasetType("bias")
371 dataId = {"instrument": "Cam1", "detector": 2}
372 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
373 outRef = registry.getDataset(ref.id)
374 self.assertIsNotNone(ref.id)
375 self.assertEqual(ref, outRef)
376 with self.assertRaises(ConflictingDefinitionError):
377 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
378 registry.removeDatasets([ref])
379 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
381 def testFindDataset(self):
382 """Tests for `Registry.findDataset`."""
383 registry = self.makeRegistry()
384 self.loadData(registry, "base.yaml")
385 run = "test"
386 datasetType = registry.getDatasetType("bias")
387 dataId = {"instrument": "Cam1", "detector": 4}
388 registry.registerRun(run)
389 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
390 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
391 self.assertEqual(outputRef, inputRef)
392 # Check that retrieval with invalid dataId raises
393 with self.assertRaises(LookupError):
394 dataId = {"instrument": "Cam1"} # no detector
395 registry.findDataset(datasetType, dataId, collections=run)
396 # Check that different dataIds match to different datasets
397 dataId1 = {"instrument": "Cam1", "detector": 1}
398 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
399 dataId2 = {"instrument": "Cam1", "detector": 2}
400 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
401 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
402 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
403 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
404 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
405 # Check that requesting a non-existing dataId returns None
406 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
407 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
409 def testRemoveDatasetTypeSuccess(self):
410 """Test that Registry.removeDatasetType works when there are no
411 datasets of that type present.
412 """
413 registry = self.makeRegistry()
414 self.loadData(registry, "base.yaml")
415 registry.removeDatasetType("flat")
416 with self.assertRaises(KeyError):
417 registry.getDatasetType("flat")
419 def testRemoveDatasetTypeFailure(self):
420 """Test that Registry.removeDatasetType raises when there are datasets
421 of that type present or if the dataset type is for a component.
422 """
423 registry = self.makeRegistry()
424 self.loadData(registry, "base.yaml")
425 self.loadData(registry, "datasets.yaml")
426 with self.assertRaises(OrphanedRecordError):
427 registry.removeDatasetType("flat")
428 with self.assertRaises(ValueError):
429 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
431 def testImportDatasetsUUID(self):
432 """Test for `Registry._importDatasets` with UUID dataset ID."""
433 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
434 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
436 registry = self.makeRegistry()
437 self.loadData(registry, "base.yaml")
438 for run in range(6):
439 registry.registerRun(f"run{run}")
440 datasetTypeBias = registry.getDatasetType("bias")
441 datasetTypeFlat = registry.getDatasetType("flat")
442 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
443 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
444 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
446 dataset_id = uuid.uuid4()
447 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
448 (ref1,) = registry._importDatasets([ref])
449 # UUID is used without change
450 self.assertEqual(ref.id, ref1.id)
452 # All different failure modes
453 refs = (
454 # Importing same DatasetRef with different dataset ID is an error
455 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
456 # Same DatasetId but different DataId
457 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
458 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
459 # Same DatasetRef and DatasetId but different run
460 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
461 )
462 for ref in refs:
463 with self.assertRaises(ConflictingDefinitionError):
464 registry._importDatasets([ref])
466 # Test for non-unique IDs, they can be re-imported multiple times.
467 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
468 with self.subTest(idGenMode=idGenMode):
470 # Use integer dataset ID to force UUID calculation in _import
471 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
472 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
473 self.assertIsInstance(ref1.id, uuid.UUID)
474 self.assertEqual(ref1.id.version, 5)
476 # Importing it again is OK
477 (ref2,) = registry._importDatasets([ref1])
478 self.assertEqual(ref2.id, ref1.id)
480 # Cannot import to different run with the same ID
481 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
482 with self.assertRaises(ConflictingDefinitionError):
483 registry._importDatasets([ref])
485 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
486 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
487 # Cannot import same DATAID_TYPE ref into a new run
488 with self.assertRaises(ConflictingDefinitionError):
489 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
490 else:
491 # DATAID_TYPE_RUN ref can be imported into a new run
492 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
494 def testImportDatasetsInt(self):
495 """Test for `Registry._importDatasets` with integer dataset ID."""
496 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
497 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
499 registry = self.makeRegistry()
500 self.loadData(registry, "base.yaml")
501 run = "test"
502 registry.registerRun(run)
503 datasetTypeBias = registry.getDatasetType("bias")
504 datasetTypeFlat = registry.getDatasetType("flat")
505 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
506 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
507 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
508 dataset_id = 999999999
510 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
511 (ref1,) = registry._importDatasets([ref])
512 # Should make new integer ID.
513 self.assertNotEqual(ref1.id, ref.id)
515 # Ingesting same dataId with different dataset ID is an error
516 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
517 with self.assertRaises(ConflictingDefinitionError):
518 registry._importDatasets([ref2])
520 # Ingesting different dataId with the same dataset ID should work
521 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
522 (ref4,) = registry._importDatasets([ref3])
523 self.assertNotEqual(ref4.id, ref1.id)
525 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
526 (ref4,) = registry._importDatasets([ref3])
527 self.assertNotEqual(ref4.id, ref1.id)
529 def testDatasetTypeComponentQueries(self):
530 """Test component options when querying for dataset types."""
531 registry = self.makeRegistry()
532 self.loadData(registry, "base.yaml")
533 self.loadData(registry, "datasets.yaml")
534 # Test querying for dataset types with different inputs.
535 # First query for all dataset types; components should only be included
536 # when components=True.
537 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
538 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
539 self.assertLess(
540 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
541 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
542 )
543 # Use a pattern that can match either parent or components. Again,
544 # components are only returned if components=True.
545 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
546 self.assertEqual(
547 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
548 )
549 self.assertLess(
550 {"bias", "bias.wcs"},
551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
552 )
553 # This pattern matches only a component. In this case we also return
554 # that component dataset type if components=None.
555 self.assertEqual(
556 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
557 )
558 self.assertEqual(
559 set(),
560 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
561 )
562 self.assertEqual(
563 {"bias.wcs"},
564 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
565 )
566 # Add a dataset type using a StorageClass that we'll then remove; check
567 # that this does not affect our ability to query for dataset types
568 # (though it will warn).
569 tempStorageClass = StorageClass(
570 name="TempStorageClass",
571 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")},
572 )
573 registry.storageClasses.registerStorageClass(tempStorageClass)
574 datasetType = DatasetType(
575 "temporary",
576 dimensions=["instrument"],
577 storageClass=tempStorageClass,
578 universe=registry.dimensions,
579 )
580 registry.registerDatasetType(datasetType)
581 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
582 datasetType._storageClass = None
583 del tempStorageClass
584 # Querying for all dataset types, including components, should include
585 # at least all non-component dataset types (and I don't want to
586 # enumerate all of the Exposure components for bias and flat here).
587 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
588 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
589 self.assertIn("TempStorageClass", cm.output[0])
590 self.assertLess({"bias", "flat", "temporary"}, everything.names)
591 # It should not include "temporary.columns", because we tried to remove
592 # the storage class that would tell it about that. So if the next line
593 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
594 # this part of the test isn't doing anything, because the _unregister
595 # call about isn't simulating the real-life case we want it to
596 # simulate, in which different versions of daf_butler in entirely
597 # different Python processes interact with the same repo.
598 self.assertNotIn("temporary.data", everything.names)
599 # Query for dataset types that start with "temp". This should again
600 # not include the component, and also not fail.
601 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
602 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
603 self.assertIn("TempStorageClass", cm.output[0])
604 self.assertEqual({"temporary"}, startsWithTemp.names)
605 # Querying with no components should not warn at all.
606 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
607 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
608 # Must issue a warning of our own to be captured.
609 logging.getLogger("lsst.daf.butler.registries").warning("test message")
610 self.assertEqual(len(cm.output), 1)
611 self.assertIn("test message", cm.output[0])
613 def testComponentLookups(self):
614 """Test searching for component datasets via their parents."""
615 registry = self.makeRegistry()
616 self.loadData(registry, "base.yaml")
617 self.loadData(registry, "datasets.yaml")
618 # Test getting the child dataset type (which does still exist in the
619 # Registry), and check for consistency with
620 # DatasetRef.makeComponentRef.
621 collection = "imported_g"
622 parentType = registry.getDatasetType("bias")
623 childType = registry.getDatasetType("bias.wcs")
624 parentRefResolved = registry.findDataset(
625 parentType, collections=collection, instrument="Cam1", detector=1
626 )
627 self.assertIsInstance(parentRefResolved, DatasetRef)
628 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
629 # Search for a single dataset with findDataset.
630 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
631 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
632 # Search for detector data IDs constrained by component dataset
633 # existence with queryDataIds.
634 dataIds = registry.queryDataIds(
635 ["detector"],
636 datasets=["bias.wcs"],
637 collections=collection,
638 ).toSet()
639 self.assertEqual(
640 dataIds,
641 DataCoordinateSet(
642 {
643 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
644 for d in (1, 2, 3)
645 },
646 parentType.dimensions,
647 ),
648 )
649 # Search for multiple datasets of a single type with queryDatasets.
650 childRefs2 = set(
651 registry.queryDatasets(
652 "bias.wcs",
653 collections=collection,
654 )
655 )
656 self.assertEqual(
657 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
658 )
660 def testCollections(self):
661 """Tests for registry methods that manage collections."""
662 registry = self.makeRegistry()
663 self.loadData(registry, "base.yaml")
664 self.loadData(registry, "datasets.yaml")
665 run1 = "imported_g"
666 run2 = "imported_r"
667 # Test setting a collection docstring after it has been created.
668 registry.setCollectionDocumentation(run1, "doc for run1")
669 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
670 registry.setCollectionDocumentation(run1, None)
671 self.assertIsNone(registry.getCollectionDocumentation(run1))
672 datasetType = "bias"
673 # Find some datasets via their run's collection.
674 dataId1 = {"instrument": "Cam1", "detector": 1}
675 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
676 self.assertIsNotNone(ref1)
677 dataId2 = {"instrument": "Cam1", "detector": 2}
678 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
679 self.assertIsNotNone(ref2)
680 # Associate those into a new collection, then look for them there.
681 tag1 = "tag1"
682 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
683 # Check that we can query for old and new collections by type.
684 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
685 self.assertEqual(
686 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
687 {tag1, run1, run2},
688 )
689 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
690 registry.associate(tag1, [ref1, ref2])
691 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
692 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
693 # Disassociate one and verify that we can't it there anymore...
694 registry.disassociate(tag1, [ref1])
695 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
696 # ...but we can still find ref2 in tag1, and ref1 in the run.
697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
699 collections = set(registry.queryCollections())
700 self.assertEqual(collections, {run1, run2, tag1})
701 # Associate both refs into tag1 again; ref2 is already there, but that
702 # should be a harmless no-op.
703 registry.associate(tag1, [ref1, ref2])
704 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
705 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
706 # Get a different dataset (from a different run) that has the same
707 # dataset type and data ID as ref2.
708 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
709 self.assertNotEqual(ref2, ref2b)
710 # Attempting to associate that into tag1 should be an error.
711 with self.assertRaises(ConflictingDefinitionError):
712 registry.associate(tag1, [ref2b])
713 # That error shouldn't have messed up what we had before.
714 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
715 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
716 # Attempt to associate the conflicting dataset again, this time with
717 # a dataset that isn't in the collection and won't cause a conflict.
718 # Should also fail without modifying anything.
719 dataId3 = {"instrument": "Cam1", "detector": 3}
720 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
721 with self.assertRaises(ConflictingDefinitionError):
722 registry.associate(tag1, [ref3, ref2b])
723 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
724 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
725 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
726 # Register a chained collection that searches [tag1, run2]
727 chain1 = "chain1"
728 registry.registerCollection(chain1, type=CollectionType.CHAINED)
729 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
730 # Chained collection exists, but has no collections in it.
731 self.assertFalse(registry.getCollectionChain(chain1))
732 # If we query for all collections, we should get the chained collection
733 # only if we don't ask to flatten it (i.e. yield only its children).
734 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
735 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
736 # Attempt to set its child collections to something circular; that
737 # should fail.
738 with self.assertRaises(ValueError):
739 registry.setCollectionChain(chain1, [tag1, chain1])
740 # Add the child collections.
741 registry.setCollectionChain(chain1, [tag1, run2])
742 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
743 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
744 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
745 # Searching for dataId1 or dataId2 in the chain should return ref1 and
746 # ref2, because both are in tag1.
747 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
748 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
749 # Now disassociate ref2 from tag1. The search (for bias) with
750 # dataId2 in chain1 should then:
751 # 1. not find it in tag1
752 # 2. find a different dataset in run2
753 registry.disassociate(tag1, [ref2])
754 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
755 self.assertNotEqual(ref2b, ref2)
756 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
757 # Define a new chain so we can test recursive chains.
758 chain2 = "chain2"
759 registry.registerCollection(chain2, type=CollectionType.CHAINED)
760 registry.setCollectionChain(chain2, [run2, chain1])
761 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
762 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
763 # Query for collections matching a regex.
764 self.assertCountEqual(
765 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
766 ["imported_r", "imported_g"],
767 )
768 # Query for collections matching a regex or an explicit str.
769 self.assertCountEqual(
770 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
771 ["imported_r", "imported_g", "chain1"],
772 )
773 # Search for bias with dataId1 should find it via tag1 in chain2,
774 # recursing, because is not in run1.
775 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
776 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
777 # Search for bias with dataId2 should find it in run2 (ref2b).
778 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
779 # Search for a flat that is in run2. That should not be found
780 # at the front of chain2, because of the restriction to bias
781 # on run2 there, but it should be found in at the end of chain1.
782 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
783 ref4 = registry.findDataset("flat", dataId4, collections=run2)
784 self.assertIsNotNone(ref4)
785 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
786 # Deleting a collection that's part of a CHAINED collection is not
787 # allowed, and is exception-safe.
788 with self.assertRaises(Exception):
789 registry.removeCollection(run2)
790 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
791 with self.assertRaises(Exception):
792 registry.removeCollection(chain1)
793 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
794 # Actually remove chain2, test that it's gone by asking for its type.
795 registry.removeCollection(chain2)
796 with self.assertRaises(MissingCollectionError):
797 registry.getCollectionType(chain2)
798 # Actually remove run2 and chain1, which should work now.
799 registry.removeCollection(chain1)
800 registry.removeCollection(run2)
801 with self.assertRaises(MissingCollectionError):
802 registry.getCollectionType(run2)
803 with self.assertRaises(MissingCollectionError):
804 registry.getCollectionType(chain1)
805 # Remove tag1 as well, just to test that we can remove TAGGED
806 # collections.
807 registry.removeCollection(tag1)
808 with self.assertRaises(MissingCollectionError):
809 registry.getCollectionType(tag1)
811 def testCollectionChainFlatten(self):
812 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
813 registry = self.makeRegistry()
814 registry.registerCollection("inner", CollectionType.CHAINED)
815 registry.registerCollection("innermost", CollectionType.RUN)
816 registry.setCollectionChain("inner", ["innermost"])
817 registry.registerCollection("outer", CollectionType.CHAINED)
818 registry.setCollectionChain("outer", ["inner"], flatten=False)
819 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
820 registry.setCollectionChain("outer", ["inner"], flatten=True)
821 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
823 def testBasicTransaction(self):
824 """Test that all operations within a single transaction block are
825 rolled back if an exception propagates out of the block.
826 """
827 registry = self.makeRegistry()
828 storageClass = StorageClass("testDatasetType")
829 registry.storageClasses.registerStorageClass(storageClass)
830 with registry.transaction():
831 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
832 with self.assertRaises(ValueError):
833 with registry.transaction():
834 registry.insertDimensionData("instrument", {"name": "Cam2"})
835 raise ValueError("Oops, something went wrong")
836 # Cam1 should exist
837 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
838 # But Cam2 and Cam3 should both not exist
839 with self.assertRaises(LookupError):
840 registry.expandDataId(instrument="Cam2")
841 with self.assertRaises(LookupError):
842 registry.expandDataId(instrument="Cam3")
844 def testNestedTransaction(self):
845 """Test that operations within a transaction block are not rolled back
846 if an exception propagates out of an inner transaction block and is
847 then caught.
848 """
849 registry = self.makeRegistry()
850 dimension = registry.dimensions["instrument"]
851 dataId1 = {"instrument": "DummyCam"}
852 dataId2 = {"instrument": "DummyCam2"}
853 checkpointReached = False
854 with registry.transaction():
855 # This should be added and (ultimately) committed.
856 registry.insertDimensionData(dimension, dataId1)
857 with self.assertRaises(sqlalchemy.exc.IntegrityError):
858 with registry.transaction(savepoint=True):
859 # This does not conflict, and should succeed (but not
860 # be committed).
861 registry.insertDimensionData(dimension, dataId2)
862 checkpointReached = True
863 # This should conflict and raise, triggerring a rollback
864 # of the previous insertion within the same transaction
865 # context, but not the original insertion in the outer
866 # block.
867 registry.insertDimensionData(dimension, dataId1)
868 self.assertTrue(checkpointReached)
869 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
870 with self.assertRaises(LookupError):
871 registry.expandDataId(dataId2, graph=dimension.graph)
873 def testInstrumentDimensions(self):
874 """Test queries involving only instrument dimensions, with no joins to
875 skymap."""
876 registry = self.makeRegistry()
878 # need a bunch of dimensions and datasets for test
879 registry.insertDimensionData(
880 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
881 )
882 registry.insertDimensionData(
883 "physical_filter",
884 dict(instrument="DummyCam", name="dummy_r", band="r"),
885 dict(instrument="DummyCam", name="dummy_i", band="i"),
886 )
887 registry.insertDimensionData(
888 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
889 )
890 registry.insertDimensionData(
891 "visit_system",
892 dict(instrument="DummyCam", id=1, name="default"),
893 )
894 registry.insertDimensionData(
895 "visit",
896 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
897 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
898 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
899 )
900 registry.insertDimensionData(
901 "exposure",
902 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
903 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
904 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
905 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
906 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
907 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
908 )
909 registry.insertDimensionData(
910 "visit_definition",
911 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
912 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
913 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
914 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
915 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
916 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
917 )
918 # dataset types
919 run1 = "test1_r"
920 run2 = "test2_r"
921 tagged2 = "test2_t"
922 registry.registerRun(run1)
923 registry.registerRun(run2)
924 registry.registerCollection(tagged2)
925 storageClass = StorageClass("testDataset")
926 registry.storageClasses.registerStorageClass(storageClass)
927 rawType = DatasetType(
928 name="RAW",
929 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
930 storageClass=storageClass,
931 )
932 registry.registerDatasetType(rawType)
933 calexpType = DatasetType(
934 name="CALEXP",
935 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
936 storageClass=storageClass,
937 )
938 registry.registerDatasetType(calexpType)
940 # add pre-existing datasets
941 for exposure in (100, 101, 110, 111):
942 for detector in (1, 2, 3):
943 # note that only 3 of 5 detectors have datasets
944 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
945 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
946 # exposures 100 and 101 appear in both run1 and tagged2.
947 # 100 has different datasets in the different collections
948 # 101 has the same dataset in both collections.
949 if exposure == 100:
950 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
951 if exposure in (100, 101):
952 registry.associate(tagged2, [ref])
953 # Add pre-existing datasets to tagged2.
954 for exposure in (200, 201):
955 for detector in (3, 4, 5):
956 # note that only 3 of 5 detectors have datasets
957 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
958 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
959 registry.associate(tagged2, [ref])
961 dimensions = DimensionGraph(
962 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
963 )
964 # Test that single dim string works as well as list of str
965 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
966 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
967 self.assertEqual(rows, rowsI)
968 # with empty expression
969 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
970 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
971 for dataId in rows:
972 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
973 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
974 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
975 self.assertEqual(
976 packer1.unpack(packer1.pack(dataId)),
977 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
978 )
979 self.assertEqual(
980 packer2.unpack(packer2.pack(dataId)),
981 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
982 )
983 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
984 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
985 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
986 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
988 # second collection
989 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
990 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
991 for dataId in rows:
992 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
993 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
994 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
995 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
997 # with two input datasets
998 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
999 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1000 for dataId in rows:
1001 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1002 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1003 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1004 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1006 # limit to single visit
1007 rows = registry.queryDataIds(
1008 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1009 ).toSet()
1010 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1011 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1012 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1013 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1015 # more limiting expression, using link names instead of Table.column
1016 rows = registry.queryDataIds(
1017 dimensions,
1018 datasets=rawType,
1019 collections=run1,
1020 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1021 ).toSet()
1022 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1023 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1024 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1025 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1027 # queryDataIds with only one of `datasets` and `collections` is an
1028 # error.
1029 with self.assertRaises(TypeError):
1030 registry.queryDataIds(dimensions, datasets=rawType)
1031 with self.assertRaises(TypeError):
1032 registry.queryDataIds(dimensions, collections=run1)
1034 # expression excludes everything
1035 rows = registry.queryDataIds(
1036 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1037 ).toSet()
1038 self.assertEqual(len(rows), 0)
1040 # Selecting by physical_filter, this is not in the dimensions, but it
1041 # is a part of the full expression so it should work too.
1042 rows = registry.queryDataIds(
1043 dimensions,
1044 datasets=rawType,
1045 collections=run1,
1046 where="physical_filter = 'dummy_r'",
1047 instrument="DummyCam",
1048 ).toSet()
1049 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1050 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1051 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1052 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1054 def testSkyMapDimensions(self):
1055 """Tests involving only skymap dimensions, no joins to instrument."""
1056 registry = self.makeRegistry()
1058 # need a bunch of dimensions and datasets for test, we want
1059 # "band" in the test so also have to add physical_filter
1060 # dimensions
1061 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1062 registry.insertDimensionData(
1063 "physical_filter",
1064 dict(instrument="DummyCam", name="dummy_r", band="r"),
1065 dict(instrument="DummyCam", name="dummy_i", band="i"),
1066 )
1067 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1068 for tract in range(10):
1069 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1070 registry.insertDimensionData(
1071 "patch",
1072 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1073 )
1075 # dataset types
1076 run = "test"
1077 registry.registerRun(run)
1078 storageClass = StorageClass("testDataset")
1079 registry.storageClasses.registerStorageClass(storageClass)
1080 calexpType = DatasetType(
1081 name="deepCoadd_calexp",
1082 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1083 storageClass=storageClass,
1084 )
1085 registry.registerDatasetType(calexpType)
1086 mergeType = DatasetType(
1087 name="deepCoadd_mergeDet",
1088 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1089 storageClass=storageClass,
1090 )
1091 registry.registerDatasetType(mergeType)
1092 measType = DatasetType(
1093 name="deepCoadd_meas",
1094 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1095 storageClass=storageClass,
1096 )
1097 registry.registerDatasetType(measType)
1099 dimensions = DimensionGraph(
1100 registry.dimensions,
1101 dimensions=(
1102 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1103 ),
1104 )
1106 # add pre-existing datasets
1107 for tract in (1, 3, 5):
1108 for patch in (2, 4, 6, 7):
1109 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1110 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1111 for aFilter in ("i", "r"):
1112 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1113 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1115 # with empty expression
1116 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1117 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1118 for dataId in rows:
1119 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1120 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1121 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1122 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1124 # limit to 2 tracts and 2 patches
1125 rows = registry.queryDataIds(
1126 dimensions,
1127 datasets=[calexpType, mergeType],
1128 collections=run,
1129 where="tract IN (1, 5) AND patch IN (2, 7)",
1130 skymap="DummyMap",
1131 ).toSet()
1132 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1133 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1134 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1135 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1137 # limit to single filter
1138 rows = registry.queryDataIds(
1139 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1140 ).toSet()
1141 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1142 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1143 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1144 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1146 # expression excludes everything, specifying non-existing skymap is
1147 # not a fatal error, it's operator error
1148 rows = registry.queryDataIds(
1149 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1150 ).toSet()
1151 self.assertEqual(len(rows), 0)
1153 def testSpatialJoin(self):
1154 """Test queries that involve spatial overlap joins."""
1155 registry = self.makeRegistry()
1156 self.loadData(registry, "hsc-rc2-subset.yaml")
1158 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1159 # the TopologicalFamily they belong to. We'll relate all elements in
1160 # each family to all of the elements in each other family.
1161 families = defaultdict(set)
1162 # Dictionary of {element.name: {dataId: region}}.
1163 regions = {}
1164 for element in registry.dimensions.getDatabaseElements():
1165 if element.spatial is not None:
1166 families[element.spatial.name].add(element)
1167 regions[element.name] = {
1168 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1169 }
1171 # If this check fails, it's not necessarily a problem - it may just be
1172 # a reasonable change to the default dimension definitions - but the
1173 # test below depends on there being more than one family to do anything
1174 # useful.
1175 self.assertEqual(len(families), 2)
1177 # Overlap DatabaseDimensionElements with each other.
1178 for family1, family2 in itertools.combinations(families, 2):
1179 for element1, element2 in itertools.product(families[family1], families[family2]):
1180 graph = DimensionGraph.union(element1.graph, element2.graph)
1181 # Construct expected set of overlapping data IDs via a
1182 # brute-force comparison of the regions we've already fetched.
1183 expected = {
1184 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1185 for (dataId1, region1), (dataId2, region2) in itertools.product(
1186 regions[element1.name].items(), regions[element2.name].items()
1187 )
1188 if not region1.isDisjointFrom(region2)
1189 }
1190 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1191 queried = set(registry.queryDataIds(graph))
1192 self.assertEqual(expected, queried)
1194 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1195 commonSkyPix = registry.dimensions.commonSkyPix
1196 for elementName, regions in regions.items():
1197 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1198 expected = set()
1199 for dataId, region in regions.items():
1200 for begin, end in commonSkyPix.pixelization.envelope(region):
1201 expected.update(
1202 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1203 for index in range(begin, end)
1204 )
1205 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1206 queried = set(registry.queryDataIds(graph))
1207 self.assertEqual(expected, queried)
1209 def testAbstractQuery(self):
1210 """Test that we can run a query that just lists the known
1211 bands. This is tricky because band is
1212 backed by a query against physical_filter.
1213 """
1214 registry = self.makeRegistry()
1215 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1216 registry.insertDimensionData(
1217 "physical_filter",
1218 dict(instrument="DummyCam", name="dummy_i", band="i"),
1219 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1220 dict(instrument="DummyCam", name="dummy_r", band="r"),
1221 )
1222 rows = registry.queryDataIds(["band"]).toSet()
1223 self.assertCountEqual(
1224 rows,
1225 [
1226 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1227 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1228 ],
1229 )
1231 def testAttributeManager(self):
1232 """Test basic functionality of attribute manager."""
1233 # number of attributes with schema versions in a fresh database,
1234 # 6 managers with 3 records per manager, plus config for dimensions
1235 VERSION_COUNT = 6 * 3 + 1
1237 registry = self.makeRegistry()
1238 attributes = registry._managers.attributes
1240 # check what get() returns for non-existing key
1241 self.assertIsNone(attributes.get("attr"))
1242 self.assertEqual(attributes.get("attr", ""), "")
1243 self.assertEqual(attributes.get("attr", "Value"), "Value")
1244 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1246 # cannot store empty key or value
1247 with self.assertRaises(ValueError):
1248 attributes.set("", "value")
1249 with self.assertRaises(ValueError):
1250 attributes.set("attr", "")
1252 # set value of non-existing key
1253 attributes.set("attr", "value")
1254 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1255 self.assertEqual(attributes.get("attr"), "value")
1257 # update value of existing key
1258 with self.assertRaises(ButlerAttributeExistsError):
1259 attributes.set("attr", "value2")
1261 attributes.set("attr", "value2", force=True)
1262 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1263 self.assertEqual(attributes.get("attr"), "value2")
1265 # delete existing key
1266 self.assertTrue(attributes.delete("attr"))
1267 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1269 # delete non-existing key
1270 self.assertFalse(attributes.delete("non-attr"))
1272 # store bunch of keys and get the list back
1273 data = [
1274 ("version.core", "1.2.3"),
1275 ("version.dimensions", "3.2.1"),
1276 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1277 ]
1278 for key, value in data:
1279 attributes.set(key, value)
1280 items = dict(attributes.items())
1281 for key, value in data:
1282 self.assertEqual(items[key], value)
1284 def testQueryDatasetsDeduplication(self):
1285 """Test that the findFirst option to queryDatasets selects datasets
1286 from collections in the order given".
1287 """
1288 registry = self.makeRegistry()
1289 self.loadData(registry, "base.yaml")
1290 self.loadData(registry, "datasets.yaml")
1291 self.assertCountEqual(
1292 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1293 [
1294 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1295 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1296 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1297 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1298 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1299 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1300 ],
1301 )
1302 self.assertCountEqual(
1303 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1304 [
1305 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1306 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1307 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1308 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1309 ],
1310 )
1311 self.assertCountEqual(
1312 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1313 [
1314 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1315 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1316 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1317 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1318 ],
1319 )
1321 def testQueryResults(self):
1322 """Test querying for data IDs and then manipulating the QueryResults
1323 object returned to perform other queries.
1324 """
1325 registry = self.makeRegistry()
1326 self.loadData(registry, "base.yaml")
1327 self.loadData(registry, "datasets.yaml")
1328 bias = registry.getDatasetType("bias")
1329 flat = registry.getDatasetType("flat")
1330 # Obtain expected results from methods other than those we're testing
1331 # here. That includes:
1332 # - the dimensions of the data IDs we want to query:
1333 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1334 # - the dimensions of some other data IDs we'll extract from that:
1335 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1336 # - the data IDs we expect to obtain from the first queries:
1337 expectedDataIds = DataCoordinateSet(
1338 {
1339 DataCoordinate.standardize(
1340 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1341 )
1342 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1343 },
1344 graph=expectedGraph,
1345 hasFull=False,
1346 hasRecords=False,
1347 )
1348 # - the flat datasets we expect to find from those data IDs, in just
1349 # one collection (so deduplication is irrelevant):
1350 expectedFlats = [
1351 registry.findDataset(
1352 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1353 ),
1354 registry.findDataset(
1355 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1356 ),
1357 registry.findDataset(
1358 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1359 ),
1360 ]
1361 # - the data IDs we expect to extract from that:
1362 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1363 # - the bias datasets we expect to find from those data IDs, after we
1364 # subset-out the physical_filter dimension, both with duplicates:
1365 expectedAllBiases = [
1366 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1367 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1368 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1369 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1370 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1371 ]
1372 # - ...and without duplicates:
1373 expectedDeduplicatedBiases = [
1374 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1375 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1376 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1377 ]
1378 # Test against those expected results, using a "lazy" query for the
1379 # data IDs (which re-executes that query each time we use it to do
1380 # something new).
1381 dataIds = registry.queryDataIds(
1382 ["detector", "physical_filter"],
1383 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1384 instrument="Cam1",
1385 )
1386 self.assertEqual(dataIds.graph, expectedGraph)
1387 self.assertEqual(dataIds.toSet(), expectedDataIds)
1388 self.assertCountEqual(
1389 list(
1390 dataIds.findDatasets(
1391 flat,
1392 collections=["imported_r"],
1393 )
1394 ),
1395 expectedFlats,
1396 )
1397 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1398 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1399 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1400 self.assertCountEqual(
1401 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1402 expectedAllBiases,
1403 )
1404 self.assertCountEqual(
1405 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1406 expectedDeduplicatedBiases,
1407 )
1408 # Materialize the bias dataset queries (only) by putting the results
1409 # into temporary tables, then repeat those tests.
1410 with subsetDataIds.findDatasets(
1411 bias, collections=["imported_r", "imported_g"], findFirst=False
1412 ).materialize() as biases:
1413 self.assertCountEqual(list(biases), expectedAllBiases)
1414 with subsetDataIds.findDatasets(
1415 bias, collections=["imported_r", "imported_g"], findFirst=True
1416 ).materialize() as biases:
1417 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1418 # Materialize the data ID subset query, but not the dataset queries.
1419 with subsetDataIds.materialize() as subsetDataIds:
1420 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1421 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1422 self.assertCountEqual(
1423 list(
1424 subsetDataIds.findDatasets(
1425 bias, collections=["imported_r", "imported_g"], findFirst=False
1426 )
1427 ),
1428 expectedAllBiases,
1429 )
1430 self.assertCountEqual(
1431 list(
1432 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1433 ),
1434 expectedDeduplicatedBiases,
1435 )
1436 # Materialize the dataset queries, too.
1437 with subsetDataIds.findDatasets(
1438 bias, collections=["imported_r", "imported_g"], findFirst=False
1439 ).materialize() as biases:
1440 self.assertCountEqual(list(biases), expectedAllBiases)
1441 with subsetDataIds.findDatasets(
1442 bias, collections=["imported_r", "imported_g"], findFirst=True
1443 ).materialize() as biases:
1444 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1445 # Materialize the original query, but none of the follow-up queries.
1446 with dataIds.materialize() as dataIds:
1447 self.assertEqual(dataIds.graph, expectedGraph)
1448 self.assertEqual(dataIds.toSet(), expectedDataIds)
1449 self.assertCountEqual(
1450 list(
1451 dataIds.findDatasets(
1452 flat,
1453 collections=["imported_r"],
1454 )
1455 ),
1456 expectedFlats,
1457 )
1458 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1459 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1460 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1461 self.assertCountEqual(
1462 list(
1463 subsetDataIds.findDatasets(
1464 bias, collections=["imported_r", "imported_g"], findFirst=False
1465 )
1466 ),
1467 expectedAllBiases,
1468 )
1469 self.assertCountEqual(
1470 list(
1471 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1472 ),
1473 expectedDeduplicatedBiases,
1474 )
1475 # Materialize just the bias dataset queries.
1476 with subsetDataIds.findDatasets(
1477 bias, collections=["imported_r", "imported_g"], findFirst=False
1478 ).materialize() as biases:
1479 self.assertCountEqual(list(biases), expectedAllBiases)
1480 with subsetDataIds.findDatasets(
1481 bias, collections=["imported_r", "imported_g"], findFirst=True
1482 ).materialize() as biases:
1483 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1484 # Materialize the subset data ID query, but not the dataset
1485 # queries.
1486 with subsetDataIds.materialize() as subsetDataIds:
1487 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1488 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1489 self.assertCountEqual(
1490 list(
1491 subsetDataIds.findDatasets(
1492 bias, collections=["imported_r", "imported_g"], findFirst=False
1493 )
1494 ),
1495 expectedAllBiases,
1496 )
1497 self.assertCountEqual(
1498 list(
1499 subsetDataIds.findDatasets(
1500 bias, collections=["imported_r", "imported_g"], findFirst=True
1501 )
1502 ),
1503 expectedDeduplicatedBiases,
1504 )
1505 # Materialize the bias dataset queries, too, so now we're
1506 # materializing every single step.
1507 with subsetDataIds.findDatasets(
1508 bias, collections=["imported_r", "imported_g"], findFirst=False
1509 ).materialize() as biases:
1510 self.assertCountEqual(list(biases), expectedAllBiases)
1511 with subsetDataIds.findDatasets(
1512 bias, collections=["imported_r", "imported_g"], findFirst=True
1513 ).materialize() as biases:
1514 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1516 def testEmptyDimensionsQueries(self):
1517 """Test Query and QueryResults objects in the case where there are no
1518 dimensions.
1519 """
1520 # Set up test data: one dataset type, two runs, one dataset in each.
1521 registry = self.makeRegistry()
1522 self.loadData(registry, "base.yaml")
1523 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1524 registry.registerDatasetType(schema)
1525 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1526 run1 = "run1"
1527 run2 = "run2"
1528 registry.registerRun(run1)
1529 registry.registerRun(run2)
1530 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1531 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1532 # Query directly for both of the datasets, and each one, one at a time.
1533 self.checkQueryResults(
1534 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1535 )
1536 self.checkQueryResults(
1537 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1538 [dataset1],
1539 )
1540 self.checkQueryResults(
1541 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1542 [dataset2],
1543 )
1544 # Query for data IDs with no dimensions.
1545 dataIds = registry.queryDataIds([])
1546 self.checkQueryResults(dataIds, [dataId])
1547 # Use queried data IDs to find the datasets.
1548 self.checkQueryResults(
1549 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1550 [dataset1, dataset2],
1551 )
1552 self.checkQueryResults(
1553 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1554 [dataset1],
1555 )
1556 self.checkQueryResults(
1557 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1558 [dataset2],
1559 )
1560 # Now materialize the data ID query results and repeat those tests.
1561 with dataIds.materialize() as dataIds:
1562 self.checkQueryResults(dataIds, [dataId])
1563 self.checkQueryResults(
1564 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1565 [dataset1],
1566 )
1567 self.checkQueryResults(
1568 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1569 [dataset2],
1570 )
1571 # Query for non-empty data IDs, then subset that to get the empty one.
1572 # Repeat the above tests starting from that.
1573 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1574 self.checkQueryResults(dataIds, [dataId])
1575 self.checkQueryResults(
1576 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1577 [dataset1, dataset2],
1578 )
1579 self.checkQueryResults(
1580 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1581 [dataset1],
1582 )
1583 self.checkQueryResults(
1584 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1585 [dataset2],
1586 )
1587 with dataIds.materialize() as dataIds:
1588 self.checkQueryResults(dataIds, [dataId])
1589 self.checkQueryResults(
1590 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1591 [dataset1, dataset2],
1592 )
1593 self.checkQueryResults(
1594 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1595 [dataset1],
1596 )
1597 self.checkQueryResults(
1598 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1599 [dataset2],
1600 )
1601 # Query for non-empty data IDs, then materialize, then subset to get
1602 # the empty one. Repeat again.
1603 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1604 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1605 self.checkQueryResults(dataIds, [dataId])
1606 self.checkQueryResults(
1607 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1608 [dataset1, dataset2],
1609 )
1610 self.checkQueryResults(
1611 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1612 [dataset1],
1613 )
1614 self.checkQueryResults(
1615 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1616 [dataset2],
1617 )
1618 with dataIds.materialize() as dataIds:
1619 self.checkQueryResults(dataIds, [dataId])
1620 self.checkQueryResults(
1621 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1622 [dataset1, dataset2],
1623 )
1624 self.checkQueryResults(
1625 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1626 [dataset1],
1627 )
1628 self.checkQueryResults(
1629 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1630 [dataset2],
1631 )
1633 def testDimensionDataModifications(self):
1634 """Test that modifying dimension records via:
1635 syncDimensionData(..., update=True) and
1636 insertDimensionData(..., replace=True) works as expected, even in the
1637 presence of datasets using those dimensions and spatial overlap
1638 relationships.
1639 """
1641 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1642 """Unpack a sphgeom.RangeSet into the integers it contains."""
1643 for begin, end in ranges:
1644 yield from range(begin, end)
1646 def range_set_hull(
1647 ranges: lsst.sphgeom.RangeSet,
1648 pixelization: lsst.sphgeom.HtmPixelization,
1649 ) -> lsst.sphgeom.ConvexPolygon:
1650 """Create a ConvexPolygon hull of the region defined by a set of
1651 HTM pixelization index ranges.
1652 """
1653 points = []
1654 for index in unpack_range_set(ranges):
1655 points.extend(pixelization.triangle(index).getVertices())
1656 return lsst.sphgeom.ConvexPolygon(points)
1658 # Use HTM to set up an initial parent region (one arbitrary trixel)
1659 # and four child regions (the trixels within the parent at the next
1660 # level. We'll use the parent as a tract/visit region and the children
1661 # as its patch/visit_detector regions.
1662 registry = self.makeRegistry()
1663 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1664 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1665 index = 12288
1666 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1667 assert htm6.universe().contains(child_ranges_small)
1668 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1669 parent_region_small = lsst.sphgeom.ConvexPolygon(
1670 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1671 )
1672 assert all(parent_region_small.contains(c) for c in child_regions_small)
1673 # Make a larger version of each child region, defined to be the set of
1674 # htm6 trixels that overlap the original's bounding circle. Make a new
1675 # parent that's the convex hull of the new children.
1676 child_regions_large = [
1677 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1678 ]
1679 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1680 parent_region_large = lsst.sphgeom.ConvexPolygon(
1681 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1682 )
1683 assert all(parent_region_large.contains(c) for c in child_regions_large)
1684 assert parent_region_large.contains(parent_region_small)
1685 assert not parent_region_small.contains(parent_region_large)
1686 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1687 # Find some commonSkyPix indices that overlap the large regions but not
1688 # overlap the small regions. We use commonSkyPix here to make sure the
1689 # real tests later involve what's in the database, not just post-query
1690 # region filtering.
1691 child_difference_indices = []
1692 for large, small in zip(child_regions_large, child_regions_small):
1693 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1694 assert difference, "if this is empty, we can't test anything useful with these regions"
1695 assert all(
1696 not commonSkyPix.triangle(d).isDisjointFrom(large)
1697 and commonSkyPix.triangle(d).isDisjointFrom(small)
1698 for d in difference
1699 )
1700 child_difference_indices.append(difference)
1701 parent_difference_indices = list(
1702 unpack_range_set(
1703 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1704 )
1705 )
1706 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1707 assert all(
1708 (
1709 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1710 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1711 )
1712 for d in parent_difference_indices
1713 )
1714 # Now that we've finally got those regions, we'll insert the large ones
1715 # as tract/patch dimension records.
1716 skymap_name = "testing_v1"
1717 registry.insertDimensionData(
1718 "skymap",
1719 {
1720 "name": skymap_name,
1721 "hash": bytes([42]),
1722 "tract_max": 1,
1723 "patch_nx_max": 2,
1724 "patch_ny_max": 2,
1725 },
1726 )
1727 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1728 registry.insertDimensionData(
1729 "patch",
1730 *[
1731 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1732 for n, c in enumerate(child_regions_large)
1733 ],
1734 )
1735 # Add at dataset that uses these dimensions to make sure that modifying
1736 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1737 # implement insert with replace=True as delete-then-insert).
1738 dataset_type = DatasetType(
1739 "coadd",
1740 dimensions=["tract", "patch"],
1741 universe=registry.dimensions,
1742 storageClass="Exposure",
1743 )
1744 registry.registerDatasetType(dataset_type)
1745 registry.registerCollection("the_run", CollectionType.RUN)
1746 registry.insertDatasets(
1747 dataset_type,
1748 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1749 run="the_run",
1750 )
1751 # Query for tracts and patches that overlap some "difference" htm9
1752 # pixels; there should be overlaps, because the database has
1753 # the "large" suite of regions.
1754 self.assertEqual(
1755 {0},
1756 {
1757 data_id["tract"]
1758 for data_id in registry.queryDataIds(
1759 ["tract"],
1760 skymap=skymap_name,
1761 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1762 )
1763 },
1764 )
1765 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1766 self.assertIn(
1767 patch_id,
1768 {
1769 data_id["patch"]
1770 for data_id in registry.queryDataIds(
1771 ["patch"],
1772 skymap=skymap_name,
1773 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1774 )
1775 },
1776 )
1777 # Use sync to update the tract region and insert to update the patch
1778 # regions, to the "small" suite.
1779 updated = registry.syncDimensionData(
1780 "tract",
1781 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1782 update=True,
1783 )
1784 self.assertEqual(updated, {"region": parent_region_large})
1785 registry.insertDimensionData(
1786 "patch",
1787 *[
1788 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1789 for n, c in enumerate(child_regions_small)
1790 ],
1791 replace=True,
1792 )
1793 # Query again; there now should be no such overlaps, because the
1794 # database has the "small" suite of regions.
1795 self.assertFalse(
1796 set(
1797 registry.queryDataIds(
1798 ["tract"],
1799 skymap=skymap_name,
1800 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1801 )
1802 )
1803 )
1804 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1805 self.assertNotIn(
1806 patch_id,
1807 {
1808 data_id["patch"]
1809 for data_id in registry.queryDataIds(
1810 ["patch"],
1811 skymap=skymap_name,
1812 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1813 )
1814 },
1815 )
1816 # Update back to the large regions and query one more time.
1817 updated = registry.syncDimensionData(
1818 "tract",
1819 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1820 update=True,
1821 )
1822 self.assertEqual(updated, {"region": parent_region_small})
1823 registry.insertDimensionData(
1824 "patch",
1825 *[
1826 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1827 for n, c in enumerate(child_regions_large)
1828 ],
1829 replace=True,
1830 )
1831 self.assertEqual(
1832 {0},
1833 {
1834 data_id["tract"]
1835 for data_id in registry.queryDataIds(
1836 ["tract"],
1837 skymap=skymap_name,
1838 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1839 )
1840 },
1841 )
1842 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1843 self.assertIn(
1844 patch_id,
1845 {
1846 data_id["patch"]
1847 for data_id in registry.queryDataIds(
1848 ["patch"],
1849 skymap=skymap_name,
1850 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1851 )
1852 },
1853 )
1855 def testCalibrationCollections(self):
1856 """Test operations on `~CollectionType.CALIBRATION` collections,
1857 including `Registry.certify`, `Registry.decertify`, and
1858 `Registry.findDataset`.
1859 """
1860 # Setup - make a Registry, fill it with some datasets in
1861 # non-calibration collections.
1862 registry = self.makeRegistry()
1863 self.loadData(registry, "base.yaml")
1864 self.loadData(registry, "datasets.yaml")
1865 # Set up some timestamps.
1866 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1867 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1868 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1869 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1870 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1871 allTimespans = [
1872 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1873 ]
1874 # Get references to some datasets.
1875 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1876 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1877 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1878 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1879 # Register the main calibration collection we'll be working with.
1880 collection = "Cam1/calibs/default"
1881 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1882 # Cannot associate into a calibration collection (no timespan).
1883 with self.assertRaises(TypeError):
1884 registry.associate(collection, [bias2a])
1885 # Certify 2a dataset with [t2, t4) validity.
1886 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1887 # We should not be able to certify 2b with anything overlapping that
1888 # window.
1889 with self.assertRaises(ConflictingDefinitionError):
1890 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1891 with self.assertRaises(ConflictingDefinitionError):
1892 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1893 with self.assertRaises(ConflictingDefinitionError):
1894 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1895 with self.assertRaises(ConflictingDefinitionError):
1896 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1897 with self.assertRaises(ConflictingDefinitionError):
1898 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1899 with self.assertRaises(ConflictingDefinitionError):
1900 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1901 with self.assertRaises(ConflictingDefinitionError):
1902 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1903 with self.assertRaises(ConflictingDefinitionError):
1904 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1905 # We should be able to certify 3a with a range overlapping that window,
1906 # because it's for a different detector.
1907 # We'll certify 3a over [t1, t3).
1908 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1909 # Now we'll certify 2b and 3b together over [t4, ∞).
1910 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1912 # Fetch all associations and check that they are what we expect.
1913 self.assertCountEqual(
1914 list(
1915 registry.queryDatasetAssociations(
1916 "bias",
1917 collections=[collection, "imported_g", "imported_r"],
1918 )
1919 ),
1920 [
1921 DatasetAssociation(
1922 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1923 collection="imported_g",
1924 timespan=None,
1925 ),
1926 DatasetAssociation(
1927 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1928 collection="imported_r",
1929 timespan=None,
1930 ),
1931 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1932 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1933 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1934 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1935 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1936 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1937 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1938 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1939 ],
1940 )
1942 class Ambiguous:
1943 """Tag class to denote lookups that should be ambiguous."""
1945 pass
1947 def assertLookup(
1948 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
1949 ) -> None:
1950 """Local function that asserts that a bias lookup returns the given
1951 expected result.
1952 """
1953 if expected is Ambiguous:
1954 with self.assertRaises(RuntimeError):
1955 registry.findDataset(
1956 "bias",
1957 collections=collection,
1958 instrument="Cam1",
1959 detector=detector,
1960 timespan=timespan,
1961 )
1962 else:
1963 self.assertEqual(
1964 expected,
1965 registry.findDataset(
1966 "bias",
1967 collections=collection,
1968 instrument="Cam1",
1969 detector=detector,
1970 timespan=timespan,
1971 ),
1972 )
1974 # Systematically test lookups against expected results.
1975 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1976 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1977 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1978 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1979 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1980 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1981 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1982 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1983 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1984 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1985 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1986 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1987 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1988 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1989 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1990 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1991 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1992 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1993 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1994 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1995 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1996 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1997 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1998 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1999 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2000 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2001 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2002 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2003 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2004 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2005 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2006 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2007 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2008 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2009 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2010 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2011 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2012 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2013 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2014 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2015 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2016 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2018 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2019 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2020 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2021 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2022 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2023 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2024 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2025 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2026 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2027 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2028 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2029 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2030 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2031 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2032 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2033 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2034 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2035 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2036 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2037 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2038 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2039 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2040 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2041 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2042 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2043 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2044 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2045 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2046 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2047 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2048 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2049 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2050 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2051 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2052 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2053 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2054 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2055 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2056 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2057 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2058 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2059 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2060 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2061 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2062 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2063 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2065 # Decertify everything, this time with explicit data IDs, then check
2066 # that no lookups succeed.
2067 registry.decertify(
2068 collection,
2069 "bias",
2070 Timespan(None, None),
2071 dataIds=[
2072 dict(instrument="Cam1", detector=2),
2073 dict(instrument="Cam1", detector=3),
2074 ],
2075 )
2076 for detector in (2, 3):
2077 for timespan in allTimespans:
2078 assertLookup(detector=detector, timespan=timespan, expected=None)
2079 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2080 # those.
2081 registry.certify(
2082 collection,
2083 [bias2a, bias3a],
2084 Timespan(None, None),
2085 )
2086 for timespan in allTimespans:
2087 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2088 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2089 # Decertify just bias2 over [t2, t4).
2090 # This should split a single certification row into two (and leave the
2091 # other existing row, for bias3a, alone).
2092 registry.decertify(
2093 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2094 )
2095 for timespan in allTimespans:
2096 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2097 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2098 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2099 if overlapsBefore and overlapsAfter:
2100 expected = Ambiguous
2101 elif overlapsBefore or overlapsAfter:
2102 expected = bias2a
2103 else:
2104 expected = None
2105 assertLookup(detector=2, timespan=timespan, expected=expected)
2107 def testSkipCalibs(self):
2108 """Test how queries handle skipping of calibration collections."""
2109 registry = self.makeRegistry()
2110 self.loadData(registry, "base.yaml")
2111 self.loadData(registry, "datasets.yaml")
2113 coll_calib = "Cam1/calibs/default"
2114 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2116 # Add all biases to the calibration collection.
2117 # Without this, the logic that prunes dataset subqueries based on
2118 # datasetType-collection summary information will fire before the logic
2119 # we want to test below. This is a good thing (it avoids the dreaded
2120 # NotImplementedError a bit more often) everywhere but here.
2121 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2123 coll_list = [coll_calib, "imported_g", "imported_r"]
2124 chain = "Cam1/chain"
2125 registry.registerCollection(chain, type=CollectionType.CHAINED)
2126 registry.setCollectionChain(chain, coll_list)
2128 # explicit list will raise if findFirst=True or there are temporal
2129 # dimensions
2130 with self.assertRaises(NotImplementedError):
2131 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2132 with self.assertRaises(NotImplementedError):
2133 registry.queryDataIds(
2134 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2135 ).count()
2137 # chain will skip
2138 datasets = list(registry.queryDatasets("bias", collections=chain))
2139 self.assertGreater(len(datasets), 0)
2141 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2142 self.assertGreater(len(dataIds), 0)
2144 # glob will skip too
2145 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2146 self.assertGreater(len(datasets), 0)
2148 # regular expression will skip too
2149 pattern = re.compile(".*")
2150 datasets = list(registry.queryDatasets("bias", collections=pattern))
2151 self.assertGreater(len(datasets), 0)
2153 # ellipsis should work as usual
2154 datasets = list(registry.queryDatasets("bias", collections=...))
2155 self.assertGreater(len(datasets), 0)
2157 # few tests with findFirst
2158 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2159 self.assertGreater(len(datasets), 0)
2161 def testIngestTimeQuery(self):
2163 registry = self.makeRegistry()
2164 self.loadData(registry, "base.yaml")
2165 dt0 = datetime.utcnow()
2166 self.loadData(registry, "datasets.yaml")
2167 dt1 = datetime.utcnow()
2169 datasets = list(registry.queryDatasets(..., collections=...))
2170 len0 = len(datasets)
2171 self.assertGreater(len0, 0)
2173 where = "ingest_date > T'2000-01-01'"
2174 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2175 len1 = len(datasets)
2176 self.assertEqual(len0, len1)
2178 # no one will ever use this piece of software in 30 years
2179 where = "ingest_date > T'2050-01-01'"
2180 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2181 len2 = len(datasets)
2182 self.assertEqual(len2, 0)
2184 # Check more exact timing to make sure there is no 37 seconds offset
2185 # (after fixing DM-30124). SQLite time precision is 1 second, make
2186 # sure that we don't test with higher precision.
2187 tests = [
2188 # format: (timestamp, operator, expected_len)
2189 (dt0 - timedelta(seconds=1), ">", len0),
2190 (dt0 - timedelta(seconds=1), "<", 0),
2191 (dt1 + timedelta(seconds=1), "<", len0),
2192 (dt1 + timedelta(seconds=1), ">", 0),
2193 ]
2194 for dt, op, expect_len in tests:
2195 dt_str = dt.isoformat(sep=" ")
2197 where = f"ingest_date {op} T'{dt_str}'"
2198 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2199 self.assertEqual(len(datasets), expect_len)
2201 # same with bind using datetime or astropy Time
2202 where = f"ingest_date {op} ingest_time"
2203 datasets = list(
2204 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2205 )
2206 self.assertEqual(len(datasets), expect_len)
2208 dt_astropy = astropy.time.Time(dt, format="datetime")
2209 datasets = list(
2210 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2211 )
2212 self.assertEqual(len(datasets), expect_len)
2214 def testTimespanQueries(self):
2215 """Test query expressions involving timespans."""
2216 registry = self.makeRegistry()
2217 self.loadData(registry, "hsc-rc2-subset.yaml")
2218 # All exposures in the database; mapping from ID to timespan.
2219 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2220 # Just those IDs, sorted (which is also temporal sorting, because HSC
2221 # exposure IDs are monotonically increasing).
2222 ids = sorted(visits.keys())
2223 self.assertGreater(len(ids), 20)
2224 # Pick some quasi-random indexes into `ids` to play with.
2225 i1 = int(len(ids) * 0.1)
2226 i2 = int(len(ids) * 0.3)
2227 i3 = int(len(ids) * 0.6)
2228 i4 = int(len(ids) * 0.8)
2229 # Extract some times from those: just before the beginning of i1 (which
2230 # should be after the end of the exposure before), exactly the
2231 # beginning of i2, just after the beginning of i3 (and before its end),
2232 # and the exact end of i4.
2233 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2234 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2235 t2 = visits[ids[i2]].begin
2236 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2237 self.assertLess(t3, visits[ids[i3]].end)
2238 t4 = visits[ids[i4]].end
2239 # Make sure those are actually in order.
2240 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2242 bind = {
2243 "t1": t1,
2244 "t2": t2,
2245 "t3": t3,
2246 "t4": t4,
2247 "ts23": Timespan(t2, t3),
2248 }
2250 def query(where):
2251 """Helper function that queries for visit data IDs and returns
2252 results as a sorted, deduplicated list of visit IDs.
2253 """
2254 return sorted(
2255 {
2256 dataId["visit"]
2257 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2258 }
2259 )
2261 # Try a bunch of timespan queries, mixing up the bounds themselves,
2262 # where they appear in the expression, and how we get the timespan into
2263 # the expression.
2265 # t1 is before the start of i1, so this should not include i1.
2266 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2267 # t2 is exactly at the start of i2, but ends are exclusive, so these
2268 # should not include i2.
2269 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2270 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2271 # t3 is in the middle of i3, so this should include i3.
2272 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2273 # This one should not include t3 by the same reasoning.
2274 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2275 # t4 is exactly at the end of i4, so this should include i4.
2276 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2277 # i4's upper bound of t4 is exclusive so this should not include t4.
2278 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2280 # Now some timespan vs. time scalar queries.
2281 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2282 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2283 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2284 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2285 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2286 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2288 # Empty timespans should not overlap anything.
2289 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2291 def testCollectionSummaries(self):
2292 """Test recording and retrieval of collection summaries."""
2293 self.maxDiff = None
2294 registry = self.makeRegistry()
2295 # Importing datasets from yaml should go through the code path where
2296 # we update collection summaries as we insert datasets.
2297 self.loadData(registry, "base.yaml")
2298 self.loadData(registry, "datasets.yaml")
2299 flat = registry.getDatasetType("flat")
2300 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2301 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2302 expected1.datasetTypes.add(flat)
2303 expected1.dimensions.update_extract(
2304 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2305 )
2306 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2307 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2308 # Create a chained collection with both of the imported runs; the
2309 # summary should be the same, because it's a union with itself.
2310 chain = "chain"
2311 registry.registerCollection(chain, CollectionType.CHAINED)
2312 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2313 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2314 # Associate flats only into a tagged collection and a calibration
2315 # collection to check summaries of those.
2316 tag = "tag"
2317 registry.registerCollection(tag, CollectionType.TAGGED)
2318 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2319 calibs = "calibs"
2320 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2321 registry.certify(
2322 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2323 )
2324 expected2 = expected1.copy()
2325 expected2.datasetTypes.discard("bias")
2326 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2327 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2328 # Explicitly calling Registry.refresh() should load those same
2329 # summaries, via a totally different code path.
2330 registry.refresh()
2331 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2332 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2333 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2334 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2336 def testUnrelatedDimensionQueries(self):
2337 """Test that WHERE expressions in queries can reference dimensions that
2338 are not in the result set.
2339 """
2340 registry = self.makeRegistry()
2341 # There is no data to back this query, but it should still return
2342 # zero records instead of raising.
2343 self.assertFalse(
2344 set(
2345 registry.queryDataIds(
2346 ["visit", "detector"], where="instrument='Cam1' AND skymap='not_here' AND tract=0"
2347 )
2348 ),
2349 )
2351 def testBindInQueryDatasets(self):
2352 """Test that the bind parameter is correctly forwarded in
2353 queryDatasets recursion.
2354 """
2355 registry = self.makeRegistry()
2356 # Importing datasets from yaml should go through the code path where
2357 # we update collection summaries as we insert datasets.
2358 self.loadData(registry, "base.yaml")
2359 self.loadData(registry, "datasets.yaml")
2360 self.assertEqual(
2361 set(registry.queryDatasets("flat", band="r", collections=...)),
2362 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2363 )
2365 def testQueryResultSummaries(self):
2366 """Test summary methods like `count`, `any`, and `explain_no_results`
2367 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2368 """
2369 registry = self.makeRegistry()
2370 self.loadData(registry, "base.yaml")
2371 self.loadData(registry, "datasets.yaml")
2372 self.loadData(registry, "spatial.yaml")
2373 # Default test dataset has two collections, each with both flats and
2374 # biases. Add a new collection with only biases.
2375 registry.registerCollection("biases", CollectionType.TAGGED)
2376 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2377 # First query yields two results, and involves no postprocessing.
2378 query1 = registry.queryDataIds(["physical_filter"], band="r")
2379 self.assertTrue(query1.any(execute=False, exact=False))
2380 self.assertTrue(query1.any(execute=True, exact=False))
2381 self.assertTrue(query1.any(execute=True, exact=True))
2382 self.assertEqual(query1.count(exact=False), 2)
2383 self.assertEqual(query1.count(exact=True), 2)
2384 self.assertFalse(list(query1.explain_no_results()))
2385 # Second query should yield no results, but this isn't detectable
2386 # unless we actually run a query.
2387 query2 = registry.queryDataIds(["physical_filter"], band="h")
2388 self.assertTrue(query2.any(execute=False, exact=False))
2389 self.assertFalse(query2.any(execute=True, exact=False))
2390 self.assertFalse(query2.any(execute=True, exact=True))
2391 self.assertEqual(query2.count(exact=False), 0)
2392 self.assertEqual(query2.count(exact=True), 0)
2393 self.assertFalse(list(query2.explain_no_results()))
2394 # These queries yield no results due to various problems that can be
2395 # spotted prior to execution, yielding helpful diagnostics.
2396 for query, snippets in [
2397 (
2398 # Dataset type name doesn't match any existing dataset types.
2399 registry.queryDatasets("nonexistent", collections=...),
2400 ["nonexistent"],
2401 ),
2402 (
2403 # Dataset type name doesn't match any existing dataset types.
2404 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2405 ["nonexistent"],
2406 ),
2407 (
2408 # Dataset type object isn't registered.
2409 registry.queryDatasets(
2410 DatasetType(
2411 "nonexistent",
2412 dimensions=["instrument"],
2413 universe=registry.dimensions,
2414 storageClass="Image",
2415 ),
2416 collections=...,
2417 ),
2418 ["nonexistent"],
2419 ),
2420 (
2421 # No datasets of this type in this collection.
2422 registry.queryDatasets("flat", collections=["biases"]),
2423 ["flat", "biases"],
2424 ),
2425 (
2426 # No collections matching at all.
2427 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2428 ["potato"],
2429 ),
2430 ]:
2432 self.assertFalse(query.any(execute=False, exact=False))
2433 self.assertFalse(query.any(execute=True, exact=False))
2434 self.assertFalse(query.any(execute=True, exact=True))
2435 self.assertEqual(query.count(exact=False), 0)
2436 self.assertEqual(query.count(exact=True), 0)
2437 messages = list(query.explain_no_results())
2438 self.assertTrue(messages)
2439 # Want all expected snippets to appear in at least one message.
2440 self.assertTrue(
2441 any(
2442 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2443 ),
2444 messages,
2445 )
2447 # These queries yield no results due to problems that can be identified
2448 # by cheap follow-up queries, yielding helpful diagnostics.
2449 for query, snippets in [
2450 (
2451 # No records for one of the involved dimensions.
2452 registry.queryDataIds(["subfilter"]),
2453 ["dimension records", "subfilter"],
2454 ),
2455 ]:
2456 self.assertFalse(query.any(execute=True, exact=False))
2457 self.assertFalse(query.any(execute=True, exact=True))
2458 self.assertEqual(query.count(exact=True), 0)
2459 messages = list(query.explain_no_results())
2460 self.assertTrue(messages)
2461 # Want all expected snippets to appear in at least one message.
2462 self.assertTrue(
2463 any(
2464 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2465 ),
2466 messages,
2467 )
2469 # This query yields four overlaps in the database, but one is filtered
2470 # out in postprocessing. The count queries aren't accurate because
2471 # they don't account for duplication that happens due to an internal
2472 # join against commonSkyPix.
2473 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2474 self.assertEqual(
2475 {
2476 DataCoordinate.standardize(
2477 instrument="Cam1",
2478 skymap="SkyMap1",
2479 visit=v,
2480 tract=t,
2481 universe=registry.dimensions,
2482 )
2483 for v, t in [(1, 0), (2, 0), (2, 1)]
2484 },
2485 set(query3),
2486 )
2487 self.assertTrue(query3.any(execute=False, exact=False))
2488 self.assertTrue(query3.any(execute=True, exact=False))
2489 self.assertTrue(query3.any(execute=True, exact=True))
2490 self.assertGreaterEqual(query3.count(exact=False), 4)
2491 self.assertGreaterEqual(query3.count(exact=True), 3)
2492 self.assertFalse(list(query3.explain_no_results()))
2493 # This query yields overlaps in the database, but all are filtered
2494 # out in postprocessing. The count queries again aren't very useful.
2495 # We have to use `where=` here to avoid an optimization that
2496 # (currently) skips the spatial postprocess-filtering because it
2497 # recognizes that no spatial join is necessary. That's not ideal, but
2498 # fixing it is out of scope for this ticket.
2499 query4 = registry.queryDataIds(
2500 ["visit", "tract"],
2501 instrument="Cam1",
2502 skymap="SkyMap1",
2503 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2504 )
2505 self.assertFalse(set(query4))
2506 self.assertTrue(query4.any(execute=False, exact=False))
2507 self.assertTrue(query4.any(execute=True, exact=False))
2508 self.assertFalse(query4.any(execute=True, exact=True))
2509 self.assertGreaterEqual(query4.count(exact=False), 1)
2510 self.assertEqual(query4.count(exact=True), 0)
2511 messages = list(query4.explain_no_results())
2512 self.assertTrue(messages)
2513 self.assertTrue(any("regions did not overlap" in message for message in messages))
2515 def testQueryDataIdsOrderBy(self):
2516 """Test order_by and limit on result returned by queryDataIds()."""
2517 registry = self.makeRegistry()
2518 self.loadData(registry, "base.yaml")
2519 self.loadData(registry, "datasets.yaml")
2520 self.loadData(registry, "spatial.yaml")
2522 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2523 return registry.queryDataIds(
2524 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2525 )
2527 Test = namedtuple(
2528 "testQueryDataIdsOrderByTest",
2529 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2530 defaults=(None, None, None),
2531 )
2533 test_data = (
2534 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2535 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2536 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2537 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2538 Test(
2539 "tract.id,visit.id",
2540 "tract,visit",
2541 ((0, 1), (0, 1), (0, 2)),
2542 limit=(3,),
2543 ),
2544 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2545 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2546 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2547 Test(
2548 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2549 ),
2550 Test(
2551 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2552 ),
2553 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2554 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2555 Test(
2556 "tract,-timespan.begin,timespan.end",
2557 "tract,visit",
2558 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2559 ),
2560 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2561 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2562 Test(
2563 "tract,detector",
2564 "tract,detector",
2565 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2566 datasets="flat",
2567 collections="imported_r",
2568 ),
2569 Test(
2570 "tract,detector.full_name",
2571 "tract,detector",
2572 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2573 datasets="flat",
2574 collections="imported_r",
2575 ),
2576 Test(
2577 "tract,detector.raft,detector.name_in_raft",
2578 "tract,detector",
2579 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2580 datasets="flat",
2581 collections="imported_r",
2582 ),
2583 )
2585 for test in test_data:
2586 order_by = test.order_by.split(",")
2587 keys = test.keys.split(",")
2588 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2589 if test.limit is not None:
2590 query = query.limit(*test.limit)
2591 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2592 self.assertEqual(dataIds, test.result)
2594 # and materialize
2595 query = do_query(keys).order_by(*order_by)
2596 if test.limit is not None:
2597 query = query.limit(*test.limit)
2598 with query.materialize() as materialized:
2599 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2600 self.assertEqual(dataIds, test.result)
2602 # errors in a name
2603 for order_by in ("", "-"):
2604 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2605 list(do_query().order_by(order_by))
2607 for order_by in ("undimension.name", "-undimension.name"):
2608 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2609 list(do_query().order_by(order_by))
2611 for order_by in ("attract", "-attract"):
2612 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2613 list(do_query().order_by(order_by))
2615 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2616 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2618 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2619 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2621 with self.assertRaisesRegex(
2622 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2623 ):
2624 list(do_query(("tract")).order_by("timespan.begin"))
2626 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2627 list(do_query(("tract")).order_by("tract.timespan.begin"))
2629 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2630 list(do_query(("tract")).order_by("tract.name"))
2632 def testQueryDimensionRecordsOrderBy(self):
2633 """Test order_by and limit on result returned by
2634 queryDimensionRecords().
2635 """
2636 registry = self.makeRegistry()
2637 self.loadData(registry, "base.yaml")
2638 self.loadData(registry, "datasets.yaml")
2639 self.loadData(registry, "spatial.yaml")
2641 def do_query(element, datasets=None, collections=None):
2642 return registry.queryDimensionRecords(
2643 element, instrument="Cam1", datasets=datasets, collections=collections
2644 )
2646 query = do_query("detector")
2647 self.assertEqual(len(list(query)), 4)
2649 Test = namedtuple(
2650 "testQueryDataIdsOrderByTest",
2651 ("element", "order_by", "result", "limit", "datasets", "collections"),
2652 defaults=(None, None, None),
2653 )
2655 test_data = (
2656 Test("detector", "detector", (1, 2, 3, 4)),
2657 Test("detector", "-detector", (4, 3, 2, 1)),
2658 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2659 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2660 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2661 Test("visit", "visit", (1, 2)),
2662 Test("visit", "-visit.id", (2, 1)),
2663 Test("visit", "zenith_angle", (1, 2)),
2664 Test("visit", "-visit.name", (2, 1)),
2665 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2666 )
2668 for test in test_data:
2669 order_by = test.order_by.split(",")
2670 query = do_query(test.element).order_by(*order_by)
2671 if test.limit is not None:
2672 query = query.limit(*test.limit)
2673 dataIds = tuple(rec.id for rec in query)
2674 self.assertEqual(dataIds, test.result)
2676 # errors in a name
2677 for order_by in ("", "-"):
2678 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2679 list(do_query("detector").order_by(order_by))
2681 for order_by in ("undimension.name", "-undimension.name"):
2682 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2683 list(do_query("detector").order_by(order_by))
2685 for order_by in ("attract", "-attract"):
2686 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
2687 list(do_query("detector").order_by(order_by))
2689 def testDatasetConstrainedDimensionRecordQueries(self):
2690 """Test that queryDimensionRecords works even when given a dataset
2691 constraint whose dimensions extend beyond the requested dimension
2692 element's.
2693 """
2694 registry = self.makeRegistry()
2695 self.loadData(registry, "base.yaml")
2696 self.loadData(registry, "datasets.yaml")
2697 # Query for physical_filter dimension records, using a dataset that
2698 # has both physical_filter and dataset dimensions.
2699 records = registry.queryDimensionRecords(
2700 "physical_filter",
2701 datasets=["flat"],
2702 collections="imported_r",
2703 )
2704 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})