Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict, namedtuple
27from datetime import datetime, timedelta
28import itertools
29import logging
30import os
31import re
32from typing import Iterator
33import unittest
34import uuid
36import astropy.time
37import sqlalchemy
38from typing import Optional, Type, Union, TYPE_CHECKING
40try:
41 import numpy as np
42except ImportError:
43 np = None
45import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 ddl,
56 Timespan,
57)
58from ..interfaces import DatasetIdGenEnum
59from ..summaries import CollectionSummary
60from .._collectionType import CollectionType
61from .._config import RegistryConfig
63from .._exceptions import (
64 ConflictingDefinitionError,
65 InconsistentDataIdError,
66 MissingCollectionError,
67 OrphanedRecordError,
68)
71from ..interfaces import ButlerAttributeExistsError
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._registry import Registry
77class RegistryTests(ABC):
78 """Generic tests for the `Registry` class that can be subclassed to
79 generate tests for different configurations.
80 """
82 collectionsManager: Optional[str] = None
83 """Name of the collections manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 datasetsManager: Optional[str] = None
89 """Name of the datasets manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 @classmethod
95 @abstractmethod
96 def getDataDir(cls) -> str:
97 """Return the root directory containing test data YAML files.
98 """
99 raise NotImplementedError()
101 def makeRegistryConfig(self) -> RegistryConfig:
102 """Create RegistryConfig used to create a registry.
104 This method should be called by a subclass from `makeRegistry`.
105 Returned instance will be pre-configured based on the values of class
106 members, and default-configured for all other parameters. Subclasses
107 that need default configuration should just instantiate
108 `RegistryConfig` directly.
109 """
110 config = RegistryConfig()
111 if self.collectionsManager:
112 config["managers", "collections"] = self.collectionsManager
113 if self.datasetsManager:
114 config["managers", "datasets"] = self.datasetsManager
115 return config
117 @abstractmethod
118 def makeRegistry(self) -> Registry:
119 """Return the Registry instance to be tested.
120 """
121 raise NotImplementedError()
123 def loadData(self, registry: Registry, filename: str):
124 """Load registry test data from ``getDataDir/<filename>``,
125 which should be a YAML import/export file.
126 """
127 from ...transfers import YamlRepoImportBackend
128 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
129 backend = YamlRepoImportBackend(stream, registry)
130 backend.register()
131 backend.load(datastore=None)
133 def checkQueryResults(self, results, expected):
134 """Check that a query results object contains expected values.
136 Parameters
137 ----------
138 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
139 A lazy-evaluation query results object.
140 expected : `list`
141 A list of `DataCoordinate` o `DatasetRef` objects that should be
142 equal to results of the query, aside from ordering.
143 """
144 self.assertCountEqual(list(results), expected)
145 self.assertEqual(results.count(), len(expected))
146 if expected:
147 self.assertTrue(results.any())
148 else:
149 self.assertFalse(results.any())
151 def testOpaque(self):
152 """Tests for `Registry.registerOpaqueTable`,
153 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
154 `Registry.deleteOpaqueData`.
155 """
156 registry = self.makeRegistry()
157 table = "opaque_table_for_testing"
158 registry.registerOpaqueTable(
159 table,
160 spec=ddl.TableSpec(
161 fields=[
162 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
163 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
164 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
165 ],
166 )
167 )
168 rows = [
169 {"id": 1, "name": "one", "count": None},
170 {"id": 2, "name": "two", "count": 5},
171 {"id": 3, "name": "three", "count": 6},
172 ]
173 registry.insertOpaqueData(table, *rows)
174 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
175 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
176 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
177 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
178 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
179 # Test very long IN clause which exceeds sqlite limit on number of
180 # parameters. SQLite says the limit is 32k but it looks like it is
181 # much higher.
182 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
183 # Two IN clauses, each longer than 1k batch size, first with
184 # duplicates, second has matching elements in different batches (after
185 # sorting).
186 self.assertEqual(rows[0:2], list(registry.fetchOpaqueData(
187 table,
188 id=list(range(1000)) + list(range(100, 0, -1)),
189 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"])))
190 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
191 registry.deleteOpaqueData(table, id=3)
192 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
193 registry.deleteOpaqueData(table)
194 self.assertEqual([], list(registry.fetchOpaqueData(table)))
196 def testDatasetType(self):
197 """Tests for `Registry.registerDatasetType` and
198 `Registry.getDatasetType`.
199 """
200 registry = self.makeRegistry()
201 # Check valid insert
202 datasetTypeName = "test"
203 storageClass = StorageClass("testDatasetType")
204 registry.storageClasses.registerStorageClass(storageClass)
205 dimensions = registry.dimensions.extract(("instrument", "visit"))
206 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
207 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
208 # Inserting for the first time should return True
209 self.assertTrue(registry.registerDatasetType(inDatasetType))
210 outDatasetType1 = registry.getDatasetType(datasetTypeName)
211 self.assertEqual(outDatasetType1, inDatasetType)
213 # Re-inserting should work
214 self.assertFalse(registry.registerDatasetType(inDatasetType))
215 # Except when they are not identical
216 with self.assertRaises(ConflictingDefinitionError):
217 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
218 registry.registerDatasetType(nonIdenticalDatasetType)
220 # Template can be None
221 datasetTypeName = "testNoneTemplate"
222 storageClass = StorageClass("testDatasetType2")
223 registry.storageClasses.registerStorageClass(storageClass)
224 dimensions = registry.dimensions.extract(("instrument", "visit"))
225 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
226 registry.registerDatasetType(inDatasetType)
227 outDatasetType2 = registry.getDatasetType(datasetTypeName)
228 self.assertEqual(outDatasetType2, inDatasetType)
230 allTypes = set(registry.queryDatasetTypes())
231 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
233 def testDimensions(self):
234 """Tests for `Registry.insertDimensionData`,
235 `Registry.syncDimensionData`, and `Registry.expandDataId`.
236 """
237 registry = self.makeRegistry()
238 dimensionName = "instrument"
239 dimension = registry.dimensions[dimensionName]
240 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
241 "class_name": "lsst.obs.base.Instrument"}
242 registry.insertDimensionData(dimensionName, dimensionValue)
243 # Inserting the same value twice should fail
244 with self.assertRaises(sqlalchemy.exc.IntegrityError):
245 registry.insertDimensionData(dimensionName, dimensionValue)
246 # expandDataId should retrieve the record we just inserted
247 self.assertEqual(
248 registry.expandDataId(
249 instrument="DummyCam",
250 graph=dimension.graph
251 ).records[dimensionName].toDict(),
252 dimensionValue
253 )
254 # expandDataId should raise if there is no record with the given ID.
255 with self.assertRaises(LookupError):
256 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
257 # band doesn't have a table; insert should fail.
258 with self.assertRaises(TypeError):
259 registry.insertDimensionData("band", {"band": "i"})
260 dimensionName2 = "physical_filter"
261 dimension2 = registry.dimensions[dimensionName2]
262 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
263 # Missing required dependency ("instrument") should fail
264 with self.assertRaises(KeyError):
265 registry.insertDimensionData(dimensionName2, dimensionValue2)
266 # Adding required dependency should fix the failure
267 dimensionValue2["instrument"] = "DummyCam"
268 registry.insertDimensionData(dimensionName2, dimensionValue2)
269 # expandDataId should retrieve the record we just inserted.
270 self.assertEqual(
271 registry.expandDataId(
272 instrument="DummyCam", physical_filter="DummyCam_i",
273 graph=dimension2.graph
274 ).records[dimensionName2].toDict(),
275 dimensionValue2
276 )
277 # Use syncDimensionData to insert a new record successfully.
278 dimensionName3 = "detector"
279 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
280 "name_in_raft": "zero", "purpose": "SCIENCE"}
281 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
282 # Sync that again. Note that one field ("raft") is NULL, and that
283 # should be okay.
284 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
285 # Now try that sync with the same primary key but a different value.
286 # This should fail.
287 with self.assertRaises(ConflictingDefinitionError):
288 registry.syncDimensionData(
289 dimensionName3,
290 {"instrument": "DummyCam", "id": 1, "full_name": "one",
291 "name_in_raft": "four", "purpose": "SCIENCE"}
292 )
294 @unittest.skipIf(np is None, "numpy not available.")
295 def testNumpyDataId(self):
296 """Test that we can use a numpy int in a dataId."""
297 registry = self.makeRegistry()
298 dimensionEntries = [
299 ("instrument", {"instrument": "DummyCam"}),
300 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
301 # Using an np.int64 here fails unless Records.fromDict is also
302 # patched to look for numbers.Integral
303 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
304 ]
305 for args in dimensionEntries:
306 registry.insertDimensionData(*args)
308 # Try a normal integer and something that looks like an int but
309 # is not.
310 for visit_id in (42, np.int64(42)):
311 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
312 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
313 self.assertEqual(expanded["visit"], int(visit_id))
314 self.assertIsInstance(expanded["visit"], int)
316 def testDataIdRelationships(self):
317 """Test that `Registry.expandDataId` raises an exception when the given
318 keys are inconsistent.
319 """
320 registry = self.makeRegistry()
321 self.loadData(registry, "base.yaml")
322 # Insert a few more dimension records for the next test.
323 registry.insertDimensionData(
324 "exposure",
325 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
326 )
327 registry.insertDimensionData(
328 "exposure",
329 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
330 )
331 registry.insertDimensionData(
332 "visit_system",
333 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
334 )
335 registry.insertDimensionData(
336 "visit",
337 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
338 )
339 registry.insertDimensionData(
340 "visit_definition",
341 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
342 )
343 with self.assertRaises(InconsistentDataIdError):
344 registry.expandDataId(
345 {"instrument": "Cam1", "visit": 1, "exposure": 2},
346 )
348 def testDataset(self):
349 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
350 and `Registry.removeDatasets`.
351 """
352 registry = self.makeRegistry()
353 self.loadData(registry, "base.yaml")
354 run = "test"
355 registry.registerRun(run)
356 datasetType = registry.getDatasetType("bias")
357 dataId = {"instrument": "Cam1", "detector": 2}
358 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
359 outRef = registry.getDataset(ref.id)
360 self.assertIsNotNone(ref.id)
361 self.assertEqual(ref, outRef)
362 with self.assertRaises(ConflictingDefinitionError):
363 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
364 registry.removeDatasets([ref])
365 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
367 def testFindDataset(self):
368 """Tests for `Registry.findDataset`.
369 """
370 registry = self.makeRegistry()
371 self.loadData(registry, "base.yaml")
372 run = "test"
373 datasetType = registry.getDatasetType("bias")
374 dataId = {"instrument": "Cam1", "detector": 4}
375 registry.registerRun(run)
376 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
377 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
378 self.assertEqual(outputRef, inputRef)
379 # Check that retrieval with invalid dataId raises
380 with self.assertRaises(LookupError):
381 dataId = {"instrument": "Cam1"} # no detector
382 registry.findDataset(datasetType, dataId, collections=run)
383 # Check that different dataIds match to different datasets
384 dataId1 = {"instrument": "Cam1", "detector": 1}
385 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
386 dataId2 = {"instrument": "Cam1", "detector": 2}
387 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
388 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
389 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
390 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
391 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
392 # Check that requesting a non-existing dataId returns None
393 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
394 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
396 def testRemoveDatasetTypeSuccess(self):
397 """Test that Registry.removeDatasetType works when there are no
398 datasets of that type present.
399 """
400 registry = self.makeRegistry()
401 self.loadData(registry, "base.yaml")
402 registry.removeDatasetType("flat")
403 with self.assertRaises(KeyError):
404 registry.getDatasetType("flat")
406 def testRemoveDatasetTypeFailure(self):
407 """Test that Registry.removeDatasetType raises when there are datasets
408 of that type present or if the dataset type is for a component.
409 """
410 registry = self.makeRegistry()
411 self.loadData(registry, "base.yaml")
412 self.loadData(registry, "datasets.yaml")
413 with self.assertRaises(OrphanedRecordError):
414 registry.removeDatasetType("flat")
415 with self.assertRaises(ValueError):
416 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
418 def testImportDatasetsUUID(self):
419 """Test for `Registry._importDatasets` with UUID dataset ID.
420 """
421 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
422 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
424 registry = self.makeRegistry()
425 self.loadData(registry, "base.yaml")
426 for run in range(6):
427 registry.registerRun(f"run{run}")
428 datasetTypeBias = registry.getDatasetType("bias")
429 datasetTypeFlat = registry.getDatasetType("flat")
430 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
431 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
432 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
434 dataset_id = uuid.uuid4()
435 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
436 ref1, = registry._importDatasets([ref])
437 # UUID is used without change
438 self.assertEqual(ref.id, ref1.id)
440 # All different failure modes
441 refs = (
442 # Importing same DatasetRef with different dataset ID is an error
443 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
444 # Same DatasetId but different DataId
445 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
446 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
447 # Same DatasetRef and DatasetId but different run
448 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
449 )
450 for ref in refs:
451 with self.assertRaises(ConflictingDefinitionError):
452 registry._importDatasets([ref])
454 # Test for non-unique IDs, they can be re-imported multiple times.
455 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
456 with self.subTest(idGenMode=idGenMode):
458 # Use integer dataset ID to force UUID calculation in _import
459 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
460 ref1, = registry._importDatasets([ref], idGenerationMode=idGenMode)
461 self.assertIsInstance(ref1.id, uuid.UUID)
462 self.assertEqual(ref1.id.version, 5)
464 # Importing it again is OK
465 ref2, = registry._importDatasets([ref1])
466 self.assertEqual(ref2.id, ref1.id)
468 # Cannot import to different run with the same ID
469 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
470 with self.assertRaises(ConflictingDefinitionError):
471 registry._importDatasets([ref])
473 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
474 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
475 # Cannot import same DATAID_TYPE ref into a new run
476 with self.assertRaises(ConflictingDefinitionError):
477 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
478 else:
479 # DATAID_TYPE_RUN ref can be imported into a new run
480 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
482 def testImportDatasetsInt(self):
483 """Test for `Registry._importDatasets` with integer dataset ID.
484 """
485 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
486 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
488 registry = self.makeRegistry()
489 self.loadData(registry, "base.yaml")
490 run = "test"
491 registry.registerRun(run)
492 datasetTypeBias = registry.getDatasetType("bias")
493 datasetTypeFlat = registry.getDatasetType("flat")
494 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
495 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
496 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
497 dataset_id = 999999999
499 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
500 ref1, = registry._importDatasets([ref])
501 # Should make new integer ID.
502 self.assertNotEqual(ref1.id, ref.id)
504 # Ingesting same dataId with different dataset ID is an error
505 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
506 with self.assertRaises(ConflictingDefinitionError):
507 registry._importDatasets([ref2])
509 # Ingesting different dataId with the same dataset ID should work
510 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
511 ref4, = registry._importDatasets([ref3])
512 self.assertNotEqual(ref4.id, ref1.id)
514 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
515 ref4, = registry._importDatasets([ref3])
516 self.assertNotEqual(ref4.id, ref1.id)
518 def testDatasetTypeComponentQueries(self):
519 """Test component options when querying for dataset types.
520 """
521 registry = self.makeRegistry()
522 self.loadData(registry, "base.yaml")
523 self.loadData(registry, "datasets.yaml")
524 # Test querying for dataset types with different inputs.
525 # First query for all dataset types; components should only be included
526 # when components=True.
527 self.assertEqual(
528 {"bias", "flat"},
529 NamedValueSet(registry.queryDatasetTypes()).names
530 )
531 self.assertEqual(
532 {"bias", "flat"},
533 NamedValueSet(registry.queryDatasetTypes(components=False)).names
534 )
535 self.assertLess(
536 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
537 NamedValueSet(registry.queryDatasetTypes(components=True)).names
538 )
539 # Use a pattern that can match either parent or components. Again,
540 # components are only returned if components=True.
541 self.assertEqual(
542 {"bias"},
543 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
544 )
545 self.assertEqual(
546 {"bias"},
547 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
548 )
549 self.assertLess(
550 {"bias", "bias.wcs"},
551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
552 )
553 # This pattern matches only a component. In this case we also return
554 # that component dataset type if components=None.
555 self.assertEqual(
556 {"bias.wcs"},
557 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
558 )
559 self.assertEqual(
560 set(),
561 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
562 )
563 self.assertEqual(
564 {"bias.wcs"},
565 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
566 )
567 # Add a dataset type using a StorageClass that we'll then remove; check
568 # that this does not affect our ability to query for dataset types
569 # (though it will warn).
570 tempStorageClass = StorageClass(
571 name="TempStorageClass",
572 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
573 )
574 registry.storageClasses.registerStorageClass(tempStorageClass)
575 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
576 universe=registry.dimensions)
577 registry.registerDatasetType(datasetType)
578 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
579 datasetType._storageClass = None
580 del tempStorageClass
581 # Querying for all dataset types, including components, should include
582 # at least all non-component dataset types (and I don't want to
583 # enumerate all of the Exposure components for bias and flat here).
584 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
585 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
586 self.assertIn("TempStorageClass", cm.output[0])
587 self.assertLess({"bias", "flat", "temporary"}, everything.names)
588 # It should not include "temporary.columns", because we tried to remove
589 # the storage class that would tell it about that. So if the next line
590 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
591 # this part of the test isn't doing anything, because the _unregister
592 # call about isn't simulating the real-life case we want it to
593 # simulate, in which different versions of daf_butler in entirely
594 # different Python processes interact with the same repo.
595 self.assertNotIn("temporary.data", everything.names)
596 # Query for dataset types that start with "temp". This should again
597 # not include the component, and also not fail.
598 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
599 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
600 self.assertIn("TempStorageClass", cm.output[0])
601 self.assertEqual({"temporary"}, startsWithTemp.names)
603 def testComponentLookups(self):
604 """Test searching for component datasets via their parents.
605 """
606 registry = self.makeRegistry()
607 self.loadData(registry, "base.yaml")
608 self.loadData(registry, "datasets.yaml")
609 # Test getting the child dataset type (which does still exist in the
610 # Registry), and check for consistency with
611 # DatasetRef.makeComponentRef.
612 collection = "imported_g"
613 parentType = registry.getDatasetType("bias")
614 childType = registry.getDatasetType("bias.wcs")
615 parentRefResolved = registry.findDataset(parentType, collections=collection,
616 instrument="Cam1", detector=1)
617 self.assertIsInstance(parentRefResolved, DatasetRef)
618 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
619 # Search for a single dataset with findDataset.
620 childRef1 = registry.findDataset("bias.wcs", collections=collection,
621 dataId=parentRefResolved.dataId)
622 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
623 # Search for detector data IDs constrained by component dataset
624 # existence with queryDataIds.
625 dataIds = registry.queryDataIds(
626 ["detector"],
627 datasets=["bias.wcs"],
628 collections=collection,
629 ).toSet()
630 self.assertEqual(
631 dataIds,
632 DataCoordinateSet(
633 {
634 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
635 for d in (1, 2, 3)
636 },
637 parentType.dimensions,
638 )
639 )
640 # Search for multiple datasets of a single type with queryDatasets.
641 childRefs2 = set(registry.queryDatasets(
642 "bias.wcs",
643 collections=collection,
644 ))
645 self.assertEqual(
646 {ref.unresolved() for ref in childRefs2},
647 {DatasetRef(childType, dataId) for dataId in dataIds}
648 )
650 def testCollections(self):
651 """Tests for registry methods that manage collections.
652 """
653 registry = self.makeRegistry()
654 self.loadData(registry, "base.yaml")
655 self.loadData(registry, "datasets.yaml")
656 run1 = "imported_g"
657 run2 = "imported_r"
658 # Test setting a collection docstring after it has been created.
659 registry.setCollectionDocumentation(run1, "doc for run1")
660 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
661 registry.setCollectionDocumentation(run1, None)
662 self.assertIsNone(registry.getCollectionDocumentation(run1))
663 datasetType = "bias"
664 # Find some datasets via their run's collection.
665 dataId1 = {"instrument": "Cam1", "detector": 1}
666 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
667 self.assertIsNotNone(ref1)
668 dataId2 = {"instrument": "Cam1", "detector": 2}
669 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
670 self.assertIsNotNone(ref2)
671 # Associate those into a new collection, then look for them there.
672 tag1 = "tag1"
673 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
674 # Check that we can query for old and new collections by type.
675 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
676 self.assertEqual(
677 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
678 {tag1, run1, run2}
679 )
680 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
681 registry.associate(tag1, [ref1, ref2])
682 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
683 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
684 # Disassociate one and verify that we can't it there anymore...
685 registry.disassociate(tag1, [ref1])
686 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
687 # ...but we can still find ref2 in tag1, and ref1 in the run.
688 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
689 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
690 collections = set(registry.queryCollections())
691 self.assertEqual(collections, {run1, run2, tag1})
692 # Associate both refs into tag1 again; ref2 is already there, but that
693 # should be a harmless no-op.
694 registry.associate(tag1, [ref1, ref2])
695 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
696 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
697 # Get a different dataset (from a different run) that has the same
698 # dataset type and data ID as ref2.
699 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
700 self.assertNotEqual(ref2, ref2b)
701 # Attempting to associate that into tag1 should be an error.
702 with self.assertRaises(ConflictingDefinitionError):
703 registry.associate(tag1, [ref2b])
704 # That error shouldn't have messed up what we had before.
705 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
706 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
707 # Attempt to associate the conflicting dataset again, this time with
708 # a dataset that isn't in the collection and won't cause a conflict.
709 # Should also fail without modifying anything.
710 dataId3 = {"instrument": "Cam1", "detector": 3}
711 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
712 with self.assertRaises(ConflictingDefinitionError):
713 registry.associate(tag1, [ref3, ref2b])
714 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
715 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
716 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
717 # Register a chained collection that searches [tag1, run2]
718 chain1 = "chain1"
719 registry.registerCollection(chain1, type=CollectionType.CHAINED)
720 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
721 # Chained collection exists, but has no collections in it.
722 self.assertFalse(registry.getCollectionChain(chain1))
723 # If we query for all collections, we should get the chained collection
724 # only if we don't ask to flatten it (i.e. yield only its children).
725 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
726 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
727 # Attempt to set its child collections to something circular; that
728 # should fail.
729 with self.assertRaises(ValueError):
730 registry.setCollectionChain(chain1, [tag1, chain1])
731 # Add the child collections.
732 registry.setCollectionChain(chain1, [tag1, run2])
733 self.assertEqual(
734 list(registry.getCollectionChain(chain1)),
735 [tag1, run2]
736 )
737 # Searching for dataId1 or dataId2 in the chain should return ref1 and
738 # ref2, because both are in tag1.
739 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
740 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
741 # Now disassociate ref2 from tag1. The search (for bias) with
742 # dataId2 in chain1 should then:
743 # 1. not find it in tag1
744 # 2. find a different dataset in run2
745 registry.disassociate(tag1, [ref2])
746 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
747 self.assertNotEqual(ref2b, ref2)
748 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
749 # Define a new chain so we can test recursive chains.
750 chain2 = "chain2"
751 registry.registerCollection(chain2, type=CollectionType.CHAINED)
752 registry.setCollectionChain(chain2, [run2, chain1])
753 # Query for collections matching a regex.
754 self.assertCountEqual(
755 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
756 ["imported_r", "imported_g"]
757 )
758 # Query for collections matching a regex or an explicit str.
759 self.assertCountEqual(
760 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
761 ["imported_r", "imported_g", "chain1"]
762 )
763 # Search for bias with dataId1 should find it via tag1 in chain2,
764 # recursing, because is not in run1.
765 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
766 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
767 # Search for bias with dataId2 should find it in run2 (ref2b).
768 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
769 # Search for a flat that is in run2. That should not be found
770 # at the front of chain2, because of the restriction to bias
771 # on run2 there, but it should be found in at the end of chain1.
772 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
773 ref4 = registry.findDataset("flat", dataId4, collections=run2)
774 self.assertIsNotNone(ref4)
775 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
776 # Deleting a collection that's part of a CHAINED collection is not
777 # allowed, and is exception-safe.
778 with self.assertRaises(Exception):
779 registry.removeCollection(run2)
780 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
781 with self.assertRaises(Exception):
782 registry.removeCollection(chain1)
783 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
784 # Actually remove chain2, test that it's gone by asking for its type.
785 registry.removeCollection(chain2)
786 with self.assertRaises(MissingCollectionError):
787 registry.getCollectionType(chain2)
788 # Actually remove run2 and chain1, which should work now.
789 registry.removeCollection(chain1)
790 registry.removeCollection(run2)
791 with self.assertRaises(MissingCollectionError):
792 registry.getCollectionType(run2)
793 with self.assertRaises(MissingCollectionError):
794 registry.getCollectionType(chain1)
795 # Remove tag1 as well, just to test that we can remove TAGGED
796 # collections.
797 registry.removeCollection(tag1)
798 with self.assertRaises(MissingCollectionError):
799 registry.getCollectionType(tag1)
801 def testCollectionChainFlatten(self):
802 """Test that Registry.setCollectionChain obeys its 'flatten' option.
803 """
804 registry = self.makeRegistry()
805 registry.registerCollection("inner", CollectionType.CHAINED)
806 registry.registerCollection("innermost", CollectionType.RUN)
807 registry.setCollectionChain("inner", ["innermost"])
808 registry.registerCollection("outer", CollectionType.CHAINED)
809 registry.setCollectionChain("outer", ["inner"], flatten=False)
810 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
811 registry.setCollectionChain("outer", ["inner"], flatten=True)
812 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
814 def testBasicTransaction(self):
815 """Test that all operations within a single transaction block are
816 rolled back if an exception propagates out of the block.
817 """
818 registry = self.makeRegistry()
819 storageClass = StorageClass("testDatasetType")
820 registry.storageClasses.registerStorageClass(storageClass)
821 with registry.transaction():
822 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
823 with self.assertRaises(ValueError):
824 with registry.transaction():
825 registry.insertDimensionData("instrument", {"name": "Cam2"})
826 raise ValueError("Oops, something went wrong")
827 # Cam1 should exist
828 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
829 # But Cam2 and Cam3 should both not exist
830 with self.assertRaises(LookupError):
831 registry.expandDataId(instrument="Cam2")
832 with self.assertRaises(LookupError):
833 registry.expandDataId(instrument="Cam3")
835 def testNestedTransaction(self):
836 """Test that operations within a transaction block are not rolled back
837 if an exception propagates out of an inner transaction block and is
838 then caught.
839 """
840 registry = self.makeRegistry()
841 dimension = registry.dimensions["instrument"]
842 dataId1 = {"instrument": "DummyCam"}
843 dataId2 = {"instrument": "DummyCam2"}
844 checkpointReached = False
845 with registry.transaction():
846 # This should be added and (ultimately) committed.
847 registry.insertDimensionData(dimension, dataId1)
848 with self.assertRaises(sqlalchemy.exc.IntegrityError):
849 with registry.transaction(savepoint=True):
850 # This does not conflict, and should succeed (but not
851 # be committed).
852 registry.insertDimensionData(dimension, dataId2)
853 checkpointReached = True
854 # This should conflict and raise, triggerring a rollback
855 # of the previous insertion within the same transaction
856 # context, but not the original insertion in the outer
857 # block.
858 registry.insertDimensionData(dimension, dataId1)
859 self.assertTrue(checkpointReached)
860 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
861 with self.assertRaises(LookupError):
862 registry.expandDataId(dataId2, graph=dimension.graph)
864 def testInstrumentDimensions(self):
865 """Test queries involving only instrument dimensions, with no joins to
866 skymap."""
867 registry = self.makeRegistry()
869 # need a bunch of dimensions and datasets for test
870 registry.insertDimensionData(
871 "instrument",
872 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
873 )
874 registry.insertDimensionData(
875 "physical_filter",
876 dict(instrument="DummyCam", name="dummy_r", band="r"),
877 dict(instrument="DummyCam", name="dummy_i", band="i"),
878 )
879 registry.insertDimensionData(
880 "detector",
881 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
882 )
883 registry.insertDimensionData(
884 "visit_system",
885 dict(instrument="DummyCam", id=1, name="default"),
886 )
887 registry.insertDimensionData(
888 "visit",
889 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
890 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
891 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
892 )
893 registry.insertDimensionData(
894 "exposure",
895 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
896 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
897 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
898 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
899 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
900 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
901 )
902 registry.insertDimensionData(
903 "visit_definition",
904 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
905 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
906 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
907 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
908 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
909 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
910 )
911 # dataset types
912 run1 = "test1_r"
913 run2 = "test2_r"
914 tagged2 = "test2_t"
915 registry.registerRun(run1)
916 registry.registerRun(run2)
917 registry.registerCollection(tagged2)
918 storageClass = StorageClass("testDataset")
919 registry.storageClasses.registerStorageClass(storageClass)
920 rawType = DatasetType(name="RAW",
921 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
922 storageClass=storageClass)
923 registry.registerDatasetType(rawType)
924 calexpType = DatasetType(name="CALEXP",
925 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
926 storageClass=storageClass)
927 registry.registerDatasetType(calexpType)
929 # add pre-existing datasets
930 for exposure in (100, 101, 110, 111):
931 for detector in (1, 2, 3):
932 # note that only 3 of 5 detectors have datasets
933 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
934 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
935 # exposures 100 and 101 appear in both run1 and tagged2.
936 # 100 has different datasets in the different collections
937 # 101 has the same dataset in both collections.
938 if exposure == 100:
939 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
940 if exposure in (100, 101):
941 registry.associate(tagged2, [ref])
942 # Add pre-existing datasets to tagged2.
943 for exposure in (200, 201):
944 for detector in (3, 4, 5):
945 # note that only 3 of 5 detectors have datasets
946 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
947 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
948 registry.associate(tagged2, [ref])
950 dimensions = DimensionGraph(
951 registry.dimensions,
952 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
953 )
954 # Test that single dim string works as well as list of str
955 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
956 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
957 self.assertEqual(rows, rowsI)
958 # with empty expression
959 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
960 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
961 for dataId in rows:
962 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
963 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
964 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
965 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
966 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
967 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
968 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
969 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
970 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
971 (100, 101, 110, 111))
972 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
973 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
975 # second collection
976 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
977 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
978 for dataId in rows:
979 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
980 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
981 (100, 101, 200, 201))
982 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
983 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
985 # with two input datasets
986 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
987 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
988 for dataId in rows:
989 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
990 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
991 (100, 101, 110, 111, 200, 201))
992 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
993 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
995 # limit to single visit
996 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
997 where="visit = 10", instrument="DummyCam").toSet()
998 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
999 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1000 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1001 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1003 # more limiting expression, using link names instead of Table.column
1004 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1005 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
1006 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
1007 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1008 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1009 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1011 # queryDataIds with only one of `datasets` and `collections` is an
1012 # error.
1013 with self.assertRaises(TypeError):
1014 registry.queryDataIds(dimensions, datasets=rawType)
1015 with self.assertRaises(TypeError):
1016 registry.queryDataIds(dimensions, collections=run1)
1018 # expression excludes everything
1019 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1020 where="visit > 1000", instrument="DummyCam").toSet()
1021 self.assertEqual(len(rows), 0)
1023 # Selecting by physical_filter, this is not in the dimensions, but it
1024 # is a part of the full expression so it should work too.
1025 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1026 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
1027 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
1028 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1029 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1030 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1032 def testSkyMapDimensions(self):
1033 """Tests involving only skymap dimensions, no joins to instrument."""
1034 registry = self.makeRegistry()
1036 # need a bunch of dimensions and datasets for test, we want
1037 # "band" in the test so also have to add physical_filter
1038 # dimensions
1039 registry.insertDimensionData(
1040 "instrument",
1041 dict(instrument="DummyCam")
1042 )
1043 registry.insertDimensionData(
1044 "physical_filter",
1045 dict(instrument="DummyCam", name="dummy_r", band="r"),
1046 dict(instrument="DummyCam", name="dummy_i", band="i"),
1047 )
1048 registry.insertDimensionData(
1049 "skymap",
1050 dict(name="DummyMap", hash="sha!".encode("utf8"))
1051 )
1052 for tract in range(10):
1053 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1054 registry.insertDimensionData(
1055 "patch",
1056 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
1057 for patch in range(10)]
1058 )
1060 # dataset types
1061 run = "test"
1062 registry.registerRun(run)
1063 storageClass = StorageClass("testDataset")
1064 registry.storageClasses.registerStorageClass(storageClass)
1065 calexpType = DatasetType(name="deepCoadd_calexp",
1066 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1067 "band")),
1068 storageClass=storageClass)
1069 registry.registerDatasetType(calexpType)
1070 mergeType = DatasetType(name="deepCoadd_mergeDet",
1071 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1072 storageClass=storageClass)
1073 registry.registerDatasetType(mergeType)
1074 measType = DatasetType(name="deepCoadd_meas",
1075 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1076 "band")),
1077 storageClass=storageClass)
1078 registry.registerDatasetType(measType)
1080 dimensions = DimensionGraph(
1081 registry.dimensions,
1082 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
1083 | measType.dimensions.required)
1084 )
1086 # add pre-existing datasets
1087 for tract in (1, 3, 5):
1088 for patch in (2, 4, 6, 7):
1089 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1090 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1091 for aFilter in ("i", "r"):
1092 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1093 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1095 # with empty expression
1096 rows = registry.queryDataIds(dimensions,
1097 datasets=[calexpType, mergeType], collections=run).toSet()
1098 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
1099 for dataId in rows:
1100 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1101 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1102 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1103 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1105 # limit to 2 tracts and 2 patches
1106 rows = registry.queryDataIds(dimensions,
1107 datasets=[calexpType, mergeType], collections=run,
1108 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
1109 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
1110 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1111 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1112 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1114 # limit to single filter
1115 rows = registry.queryDataIds(dimensions,
1116 datasets=[calexpType, mergeType], collections=run,
1117 where="band = 'i'").toSet()
1118 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
1119 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1120 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1121 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1123 # expression excludes everything, specifying non-existing skymap is
1124 # not a fatal error, it's operator error
1125 rows = registry.queryDataIds(dimensions,
1126 datasets=[calexpType, mergeType], collections=run,
1127 where="skymap = 'Mars'").toSet()
1128 self.assertEqual(len(rows), 0)
1130 def testSpatialJoin(self):
1131 """Test queries that involve spatial overlap joins.
1132 """
1133 registry = self.makeRegistry()
1134 self.loadData(registry, "hsc-rc2-subset.yaml")
1136 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1137 # the TopologicalFamily they belong to. We'll relate all elements in
1138 # each family to all of the elements in each other family.
1139 families = defaultdict(set)
1140 # Dictionary of {element.name: {dataId: region}}.
1141 regions = {}
1142 for element in registry.dimensions.getDatabaseElements():
1143 if element.spatial is not None:
1144 families[element.spatial.name].add(element)
1145 regions[element.name] = {
1146 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1147 }
1149 # If this check fails, it's not necessarily a problem - it may just be
1150 # a reasonable change to the default dimension definitions - but the
1151 # test below depends on there being more than one family to do anything
1152 # useful.
1153 self.assertEqual(len(families), 2)
1155 # Overlap DatabaseDimensionElements with each other.
1156 for family1, family2 in itertools.combinations(families, 2):
1157 for element1, element2 in itertools.product(families[family1], families[family2]):
1158 graph = DimensionGraph.union(element1.graph, element2.graph)
1159 # Construct expected set of overlapping data IDs via a
1160 # brute-force comparison of the regions we've already fetched.
1161 expected = {
1162 DataCoordinate.standardize(
1163 {**dataId1.byName(), **dataId2.byName()},
1164 graph=graph
1165 )
1166 for (dataId1, region1), (dataId2, region2)
1167 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1168 if not region1.isDisjointFrom(region2)
1169 }
1170 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1171 queried = set(registry.queryDataIds(graph))
1172 self.assertEqual(expected, queried)
1174 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1175 commonSkyPix = registry.dimensions.commonSkyPix
1176 for elementName, regions in regions.items():
1177 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1178 expected = set()
1179 for dataId, region in regions.items():
1180 for begin, end in commonSkyPix.pixelization.envelope(region):
1181 expected.update(
1182 DataCoordinate.standardize(
1183 {commonSkyPix.name: index, **dataId.byName()},
1184 graph=graph
1185 )
1186 for index in range(begin, end)
1187 )
1188 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1189 queried = set(registry.queryDataIds(graph))
1190 self.assertEqual(expected, queried)
1192 def testAbstractQuery(self):
1193 """Test that we can run a query that just lists the known
1194 bands. This is tricky because band is
1195 backed by a query against physical_filter.
1196 """
1197 registry = self.makeRegistry()
1198 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1199 registry.insertDimensionData(
1200 "physical_filter",
1201 dict(instrument="DummyCam", name="dummy_i", band="i"),
1202 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1203 dict(instrument="DummyCam", name="dummy_r", band="r"),
1204 )
1205 rows = registry.queryDataIds(["band"]).toSet()
1206 self.assertCountEqual(
1207 rows,
1208 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1209 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1210 )
1212 def testAttributeManager(self):
1213 """Test basic functionality of attribute manager.
1214 """
1215 # number of attributes with schema versions in a fresh database,
1216 # 6 managers with 3 records per manager, plus config for dimensions
1217 VERSION_COUNT = 6 * 3 + 1
1219 registry = self.makeRegistry()
1220 attributes = registry._managers.attributes
1222 # check what get() returns for non-existing key
1223 self.assertIsNone(attributes.get("attr"))
1224 self.assertEqual(attributes.get("attr", ""), "")
1225 self.assertEqual(attributes.get("attr", "Value"), "Value")
1226 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1228 # cannot store empty key or value
1229 with self.assertRaises(ValueError):
1230 attributes.set("", "value")
1231 with self.assertRaises(ValueError):
1232 attributes.set("attr", "")
1234 # set value of non-existing key
1235 attributes.set("attr", "value")
1236 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1237 self.assertEqual(attributes.get("attr"), "value")
1239 # update value of existing key
1240 with self.assertRaises(ButlerAttributeExistsError):
1241 attributes.set("attr", "value2")
1243 attributes.set("attr", "value2", force=True)
1244 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1245 self.assertEqual(attributes.get("attr"), "value2")
1247 # delete existing key
1248 self.assertTrue(attributes.delete("attr"))
1249 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1251 # delete non-existing key
1252 self.assertFalse(attributes.delete("non-attr"))
1254 # store bunch of keys and get the list back
1255 data = [
1256 ("version.core", "1.2.3"),
1257 ("version.dimensions", "3.2.1"),
1258 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1259 ]
1260 for key, value in data:
1261 attributes.set(key, value)
1262 items = dict(attributes.items())
1263 for key, value in data:
1264 self.assertEqual(items[key], value)
1266 def testQueryDatasetsDeduplication(self):
1267 """Test that the findFirst option to queryDatasets selects datasets
1268 from collections in the order given".
1269 """
1270 registry = self.makeRegistry()
1271 self.loadData(registry, "base.yaml")
1272 self.loadData(registry, "datasets.yaml")
1273 self.assertCountEqual(
1274 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1275 [
1276 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1277 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1278 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1279 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1280 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1281 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1282 ]
1283 )
1284 self.assertCountEqual(
1285 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1286 findFirst=True)),
1287 [
1288 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1289 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1290 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1291 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1292 ]
1293 )
1294 self.assertCountEqual(
1295 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1296 findFirst=True)),
1297 [
1298 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1299 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1300 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1301 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1302 ]
1303 )
1305 def testQueryResults(self):
1306 """Test querying for data IDs and then manipulating the QueryResults
1307 object returned to perform other queries.
1308 """
1309 registry = self.makeRegistry()
1310 self.loadData(registry, "base.yaml")
1311 self.loadData(registry, "datasets.yaml")
1312 bias = registry.getDatasetType("bias")
1313 flat = registry.getDatasetType("flat")
1314 # Obtain expected results from methods other than those we're testing
1315 # here. That includes:
1316 # - the dimensions of the data IDs we want to query:
1317 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1318 # - the dimensions of some other data IDs we'll extract from that:
1319 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1320 # - the data IDs we expect to obtain from the first queries:
1321 expectedDataIds = DataCoordinateSet(
1322 {
1323 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1324 universe=registry.dimensions)
1325 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1326 },
1327 graph=expectedGraph,
1328 hasFull=False,
1329 hasRecords=False,
1330 )
1331 # - the flat datasets we expect to find from those data IDs, in just
1332 # one collection (so deduplication is irrelevant):
1333 expectedFlats = [
1334 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1335 collections="imported_r"),
1336 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1337 collections="imported_r"),
1338 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1339 collections="imported_r"),
1340 ]
1341 # - the data IDs we expect to extract from that:
1342 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1343 # - the bias datasets we expect to find from those data IDs, after we
1344 # subset-out the physical_filter dimension, both with duplicates:
1345 expectedAllBiases = [
1346 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1347 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1348 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1349 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1350 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1351 ]
1352 # - ...and without duplicates:
1353 expectedDeduplicatedBiases = [
1354 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1355 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1356 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1357 ]
1358 # Test against those expected results, using a "lazy" query for the
1359 # data IDs (which re-executes that query each time we use it to do
1360 # something new).
1361 dataIds = registry.queryDataIds(
1362 ["detector", "physical_filter"],
1363 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1364 instrument="Cam1",
1365 )
1366 self.assertEqual(dataIds.graph, expectedGraph)
1367 self.assertEqual(dataIds.toSet(), expectedDataIds)
1368 self.assertCountEqual(
1369 list(
1370 dataIds.findDatasets(
1371 flat,
1372 collections=["imported_r"],
1373 )
1374 ),
1375 expectedFlats,
1376 )
1377 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1378 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1379 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1380 self.assertCountEqual(
1381 list(
1382 subsetDataIds.findDatasets(
1383 bias,
1384 collections=["imported_r", "imported_g"],
1385 findFirst=False
1386 )
1387 ),
1388 expectedAllBiases
1389 )
1390 self.assertCountEqual(
1391 list(
1392 subsetDataIds.findDatasets(
1393 bias,
1394 collections=["imported_r", "imported_g"],
1395 findFirst=True
1396 )
1397 ), expectedDeduplicatedBiases
1398 )
1399 # Materialize the bias dataset queries (only) by putting the results
1400 # into temporary tables, then repeat those tests.
1401 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1402 findFirst=False).materialize() as biases:
1403 self.assertCountEqual(list(biases), expectedAllBiases)
1404 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1405 findFirst=True).materialize() as biases:
1406 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1407 # Materialize the data ID subset query, but not the dataset queries.
1408 with subsetDataIds.materialize() as subsetDataIds:
1409 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1410 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1411 self.assertCountEqual(
1412 list(
1413 subsetDataIds.findDatasets(
1414 bias,
1415 collections=["imported_r", "imported_g"],
1416 findFirst=False
1417 )
1418 ),
1419 expectedAllBiases
1420 )
1421 self.assertCountEqual(
1422 list(
1423 subsetDataIds.findDatasets(
1424 bias,
1425 collections=["imported_r", "imported_g"],
1426 findFirst=True
1427 )
1428 ), expectedDeduplicatedBiases
1429 )
1430 # Materialize the dataset queries, too.
1431 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1432 findFirst=False).materialize() as biases:
1433 self.assertCountEqual(list(biases), expectedAllBiases)
1434 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1435 findFirst=True).materialize() as biases:
1436 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1437 # Materialize the original query, but none of the follow-up queries.
1438 with dataIds.materialize() as dataIds:
1439 self.assertEqual(dataIds.graph, expectedGraph)
1440 self.assertEqual(dataIds.toSet(), expectedDataIds)
1441 self.assertCountEqual(
1442 list(
1443 dataIds.findDatasets(
1444 flat,
1445 collections=["imported_r"],
1446 )
1447 ),
1448 expectedFlats,
1449 )
1450 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1451 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1452 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1453 self.assertCountEqual(
1454 list(
1455 subsetDataIds.findDatasets(
1456 bias,
1457 collections=["imported_r", "imported_g"],
1458 findFirst=False
1459 )
1460 ),
1461 expectedAllBiases
1462 )
1463 self.assertCountEqual(
1464 list(
1465 subsetDataIds.findDatasets(
1466 bias,
1467 collections=["imported_r", "imported_g"],
1468 findFirst=True
1469 )
1470 ), expectedDeduplicatedBiases
1471 )
1472 # Materialize just the bias dataset queries.
1473 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1474 findFirst=False).materialize() as biases:
1475 self.assertCountEqual(list(biases), expectedAllBiases)
1476 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1477 findFirst=True).materialize() as biases:
1478 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1479 # Materialize the subset data ID query, but not the dataset
1480 # queries.
1481 with subsetDataIds.materialize() as subsetDataIds:
1482 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1483 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1484 self.assertCountEqual(
1485 list(
1486 subsetDataIds.findDatasets(
1487 bias,
1488 collections=["imported_r", "imported_g"],
1489 findFirst=False
1490 )
1491 ),
1492 expectedAllBiases
1493 )
1494 self.assertCountEqual(
1495 list(
1496 subsetDataIds.findDatasets(
1497 bias,
1498 collections=["imported_r", "imported_g"],
1499 findFirst=True
1500 )
1501 ), expectedDeduplicatedBiases
1502 )
1503 # Materialize the bias dataset queries, too, so now we're
1504 # materializing every single step.
1505 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1506 findFirst=False).materialize() as biases:
1507 self.assertCountEqual(list(biases), expectedAllBiases)
1508 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1509 findFirst=True).materialize() as biases:
1510 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1512 def testEmptyDimensionsQueries(self):
1513 """Test Query and QueryResults objects in the case where there are no
1514 dimensions.
1515 """
1516 # Set up test data: one dataset type, two runs, one dataset in each.
1517 registry = self.makeRegistry()
1518 self.loadData(registry, "base.yaml")
1519 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1520 registry.registerDatasetType(schema)
1521 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1522 run1 = "run1"
1523 run2 = "run2"
1524 registry.registerRun(run1)
1525 registry.registerRun(run2)
1526 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1527 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1528 # Query directly for both of the datasets, and each one, one at a time.
1529 self.checkQueryResults(
1530 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False),
1531 [dataset1, dataset2]
1532 )
1533 self.checkQueryResults(
1534 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1535 [dataset1],
1536 )
1537 self.checkQueryResults(
1538 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1539 [dataset2],
1540 )
1541 # Query for data IDs with no dimensions.
1542 dataIds = registry.queryDataIds([])
1543 self.checkQueryResults(dataIds, [dataId])
1544 # Use queried data IDs to find the datasets.
1545 self.checkQueryResults(
1546 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1547 [dataset1, dataset2],
1548 )
1549 self.checkQueryResults(
1550 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1551 [dataset1],
1552 )
1553 self.checkQueryResults(
1554 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1555 [dataset2],
1556 )
1557 # Now materialize the data ID query results and repeat those tests.
1558 with dataIds.materialize() as dataIds:
1559 self.checkQueryResults(dataIds, [dataId])
1560 self.checkQueryResults(
1561 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1562 [dataset1],
1563 )
1564 self.checkQueryResults(
1565 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1566 [dataset2],
1567 )
1568 # Query for non-empty data IDs, then subset that to get the empty one.
1569 # Repeat the above tests starting from that.
1570 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1571 self.checkQueryResults(dataIds, [dataId])
1572 self.checkQueryResults(
1573 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1574 [dataset1, dataset2],
1575 )
1576 self.checkQueryResults(
1577 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1578 [dataset1],
1579 )
1580 self.checkQueryResults(
1581 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1582 [dataset2],
1583 )
1584 with dataIds.materialize() as dataIds:
1585 self.checkQueryResults(dataIds, [dataId])
1586 self.checkQueryResults(
1587 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1588 [dataset1, dataset2],
1589 )
1590 self.checkQueryResults(
1591 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1592 [dataset1],
1593 )
1594 self.checkQueryResults(
1595 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1596 [dataset2],
1597 )
1598 # Query for non-empty data IDs, then materialize, then subset to get
1599 # the empty one. Repeat again.
1600 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1601 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1602 self.checkQueryResults(dataIds, [dataId])
1603 self.checkQueryResults(
1604 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1605 [dataset1, dataset2],
1606 )
1607 self.checkQueryResults(
1608 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1609 [dataset1],
1610 )
1611 self.checkQueryResults(
1612 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1613 [dataset2],
1614 )
1615 with dataIds.materialize() as dataIds:
1616 self.checkQueryResults(
1617 dataIds,
1618 [dataId]
1619 )
1620 self.checkQueryResults(
1621 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1622 [dataset1, dataset2],
1623 )
1624 self.checkQueryResults(
1625 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1626 [dataset1],
1627 )
1628 self.checkQueryResults(
1629 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1630 [dataset2],
1631 )
1633 def testDimensionDataModifications(self):
1634 """Test that modifying dimension records via:
1635 syncDimensionData(..., update=True) and
1636 insertDimensionData(..., replace=True) works as expected, even in the
1637 presence of datasets using those dimensions and spatial overlap
1638 relationships.
1639 """
1641 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1642 """Unpack a sphgeom.RangeSet into the integers it contains.
1643 """
1644 for begin, end in ranges:
1645 yield from range(begin, end)
1647 def range_set_hull(
1648 ranges: lsst.sphgeom.RangeSet,
1649 pixelization: lsst.sphgeom.HtmPixelization,
1650 ) -> lsst.sphgeom.ConvexPolygon:
1651 """Create a ConvexPolygon hull of the region defined by a set of
1652 HTM pixelization index ranges.
1653 """
1654 points = []
1655 for index in unpack_range_set(ranges):
1656 points.extend(pixelization.triangle(index).getVertices())
1657 return lsst.sphgeom.ConvexPolygon(points)
1659 # Use HTM to set up an initial parent region (one arbitrary trixel)
1660 # and four child regions (the trixels within the parent at the next
1661 # level. We'll use the parent as a tract/visit region and the children
1662 # as its patch/visit_detector regions.
1663 registry = self.makeRegistry()
1664 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1665 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1666 index = 12288
1667 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1668 assert htm6.universe().contains(child_ranges_small)
1669 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1670 parent_region_small = lsst.sphgeom.ConvexPolygon(
1671 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1672 )
1673 assert all(parent_region_small.contains(c) for c in child_regions_small)
1674 # Make a larger version of each child region, defined to be the set of
1675 # htm6 trixels that overlap the original's bounding circle. Make a new
1676 # parent that's the convex hull of the new children.
1677 child_regions_large = [
1678 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6)
1679 for c in child_regions_small
1680 ]
1681 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1682 parent_region_large = lsst.sphgeom.ConvexPolygon(
1683 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1684 )
1685 assert all(parent_region_large.contains(c) for c in child_regions_large)
1686 assert parent_region_large.contains(parent_region_small)
1687 assert not parent_region_small.contains(parent_region_large)
1688 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1689 # Find some commonSkyPix indices that overlap the large regions but not
1690 # overlap the small regions. We use commonSkyPix here to make sure the
1691 # real tests later involve what's in the database, not just post-query
1692 # region filtering.
1693 child_difference_indices = []
1694 for large, small in zip(child_regions_large, child_regions_small):
1695 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1696 assert difference, "if this is empty, we can't test anything useful with these regions"
1697 assert all(
1698 not commonSkyPix.triangle(d).isDisjointFrom(large)
1699 and commonSkyPix.triangle(d).isDisjointFrom(small)
1700 for d in difference
1701 )
1702 child_difference_indices.append(difference)
1703 parent_difference_indices = list(
1704 unpack_range_set(
1705 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1706 )
1707 )
1708 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1709 assert all(
1710 (
1711 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1712 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1713 )
1714 for d in parent_difference_indices
1715 )
1716 # Now that we've finally got those regions, we'll insert the large ones
1717 # as tract/patch dimension records.
1718 skymap_name = "testing_v1"
1719 registry.insertDimensionData(
1720 "skymap", {
1721 "name": skymap_name,
1722 "hash": bytes([42]),
1723 "tract_max": 1,
1724 "patch_nx_max": 2,
1725 "patch_ny_max": 2,
1726 }
1727 )
1728 registry.insertDimensionData(
1729 "tract",
1730 {"skymap": skymap_name, "id": 0, "region": parent_region_large}
1731 )
1732 registry.insertDimensionData(
1733 "patch",
1734 *[{
1735 "skymap": skymap_name,
1736 "tract": 0,
1737 "id": n,
1738 "cell_x": n % 2,
1739 "cell_y": n // 2,
1740 "region": c
1741 } for n, c in enumerate(child_regions_large)]
1742 )
1743 # Add at dataset that uses these dimensions to make sure that modifying
1744 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1745 # implement insert with replace=True as delete-then-insert).
1746 dataset_type = DatasetType(
1747 "coadd",
1748 dimensions=["tract", "patch"],
1749 universe=registry.dimensions,
1750 storageClass="Exposure",
1751 )
1752 registry.registerDatasetType(dataset_type)
1753 registry.registerCollection("the_run", CollectionType.RUN)
1754 registry.insertDatasets(
1755 dataset_type,
1756 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1757 run="the_run",
1758 )
1759 # Query for tracts and patches that overlap some "difference" htm9
1760 # pixels; there should be overlaps, because the database has
1761 # the "large" suite of regions.
1762 self.assertEqual(
1763 {0},
1764 {
1765 data_id["tract"] for data_id in registry.queryDataIds(
1766 ["tract"],
1767 skymap=skymap_name,
1768 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1769 )
1770 }
1771 )
1772 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1773 self.assertIn(
1774 patch_id,
1775 {
1776 data_id["patch"] for data_id in registry.queryDataIds(
1777 ["patch"],
1778 skymap=skymap_name,
1779 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1780 )
1781 }
1782 )
1783 # Use sync to update the tract region and insert to update the patch
1784 # regions, to the "small" suite.
1785 updated = registry.syncDimensionData(
1786 "tract",
1787 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1788 update=True,
1789 )
1790 self.assertEqual(updated, {"region": parent_region_large})
1791 registry.insertDimensionData(
1792 "patch",
1793 *[{
1794 "skymap": skymap_name,
1795 "tract": 0,
1796 "id": n,
1797 "cell_x": n % 2,
1798 "cell_y": n // 2,
1799 "region": c
1800 } for n, c in enumerate(child_regions_small)],
1801 replace=True
1802 )
1803 # Query again; there now should be no such overlaps, because the
1804 # database has the "small" suite of regions.
1805 self.assertFalse(
1806 set(
1807 registry.queryDataIds(
1808 ["tract"],
1809 skymap=skymap_name,
1810 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1811 )
1812 )
1813 )
1814 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1815 self.assertNotIn(
1816 patch_id,
1817 {
1818 data_id["patch"] for data_id in registry.queryDataIds(
1819 ["patch"],
1820 skymap=skymap_name,
1821 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1822 )
1823 }
1824 )
1825 # Update back to the large regions and query one more time.
1826 updated = registry.syncDimensionData(
1827 "tract",
1828 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1829 update=True,
1830 )
1831 self.assertEqual(updated, {"region": parent_region_small})
1832 registry.insertDimensionData(
1833 "patch",
1834 *[{
1835 "skymap": skymap_name,
1836 "tract": 0,
1837 "id": n,
1838 "cell_x": n % 2,
1839 "cell_y": n // 2,
1840 "region": c
1841 } for n, c in enumerate(child_regions_large)],
1842 replace=True
1843 )
1844 self.assertEqual(
1845 {0},
1846 {
1847 data_id["tract"] for data_id in registry.queryDataIds(
1848 ["tract"],
1849 skymap=skymap_name,
1850 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1851 )
1852 }
1853 )
1854 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1855 self.assertIn(
1856 patch_id,
1857 {
1858 data_id["patch"] for data_id in registry.queryDataIds(
1859 ["patch"],
1860 skymap=skymap_name,
1861 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1862 )
1863 }
1864 )
1866 def testCalibrationCollections(self):
1867 """Test operations on `~CollectionType.CALIBRATION` collections,
1868 including `Registry.certify`, `Registry.decertify`, and
1869 `Registry.findDataset`.
1870 """
1871 # Setup - make a Registry, fill it with some datasets in
1872 # non-calibration collections.
1873 registry = self.makeRegistry()
1874 self.loadData(registry, "base.yaml")
1875 self.loadData(registry, "datasets.yaml")
1876 # Set up some timestamps.
1877 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1878 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1879 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1880 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1881 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1882 allTimespans = [
1883 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1884 ]
1885 # Get references to some datasets.
1886 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1887 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1888 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1889 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1890 # Register the main calibration collection we'll be working with.
1891 collection = "Cam1/calibs/default"
1892 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1893 # Cannot associate into a calibration collection (no timespan).
1894 with self.assertRaises(TypeError):
1895 registry.associate(collection, [bias2a])
1896 # Certify 2a dataset with [t2, t4) validity.
1897 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1898 # We should not be able to certify 2b with anything overlapping that
1899 # window.
1900 with self.assertRaises(ConflictingDefinitionError):
1901 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1902 with self.assertRaises(ConflictingDefinitionError):
1903 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1904 with self.assertRaises(ConflictingDefinitionError):
1905 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1906 with self.assertRaises(ConflictingDefinitionError):
1907 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1908 with self.assertRaises(ConflictingDefinitionError):
1909 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1910 with self.assertRaises(ConflictingDefinitionError):
1911 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1912 with self.assertRaises(ConflictingDefinitionError):
1913 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1914 with self.assertRaises(ConflictingDefinitionError):
1915 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1916 # We should be able to certify 3a with a range overlapping that window,
1917 # because it's for a different detector.
1918 # We'll certify 3a over [t1, t3).
1919 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1920 # Now we'll certify 2b and 3b together over [t4, ∞).
1921 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1923 # Fetch all associations and check that they are what we expect.
1924 self.assertCountEqual(
1925 list(
1926 registry.queryDatasetAssociations(
1927 "bias",
1928 collections=[collection, "imported_g", "imported_r"],
1929 )
1930 ),
1931 [
1932 DatasetAssociation(
1933 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1934 collection="imported_g",
1935 timespan=None,
1936 ),
1937 DatasetAssociation(
1938 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1939 collection="imported_r",
1940 timespan=None,
1941 ),
1942 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1943 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1944 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1945 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1946 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1947 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1948 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1949 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1950 ]
1951 )
1953 class Ambiguous:
1954 """Tag class to denote lookups that are expected to be ambiguous.
1955 """
1956 pass
1958 def assertLookup(detector: int, timespan: Timespan,
1959 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1960 """Local function that asserts that a bias lookup returns the given
1961 expected result.
1962 """
1963 if expected is Ambiguous:
1964 with self.assertRaises(RuntimeError):
1965 registry.findDataset("bias", collections=collection, instrument="Cam1",
1966 detector=detector, timespan=timespan)
1967 else:
1968 self.assertEqual(
1969 expected,
1970 registry.findDataset("bias", collections=collection, instrument="Cam1",
1971 detector=detector, timespan=timespan)
1972 )
1974 # Systematically test lookups against expected results.
1975 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1976 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1977 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1978 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1979 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1980 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1981 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1982 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1983 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1984 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1985 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1986 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1987 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1988 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1989 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1990 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1991 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1992 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1993 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1994 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1995 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1996 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1997 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1998 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1999 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2000 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2001 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2002 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2003 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2004 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2005 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2006 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2007 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2008 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2009 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2010 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2011 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2012 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2013 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2014 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2015 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2016 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2018 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2019 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2020 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2021 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2022 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2023 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2024 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2025 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2026 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2027 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2028 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2029 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2030 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2031 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2032 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2033 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2034 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2035 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2036 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2037 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2038 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2039 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2040 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2041 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2042 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2043 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2044 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2045 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2046 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2047 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2048 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2049 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2050 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2051 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2052 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2053 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2054 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2055 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2056 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2057 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2058 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2059 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2060 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2061 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2062 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2063 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2065 # Decertify everything, this time with explicit data IDs, then check
2066 # that no lookups succeed.
2067 registry.decertify(
2068 collection, "bias", Timespan(None, None),
2069 dataIds=[
2070 dict(instrument="Cam1", detector=2),
2071 dict(instrument="Cam1", detector=3),
2072 ]
2073 )
2074 for detector in (2, 3):
2075 for timespan in allTimespans:
2076 assertLookup(detector=detector, timespan=timespan, expected=None)
2077 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2078 # those.
2079 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
2080 for timespan in allTimespans:
2081 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2082 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2083 # Decertify just bias2 over [t2, t4).
2084 # This should split a single certification row into two (and leave the
2085 # other existing row, for bias3a, alone).
2086 registry.decertify(collection, "bias", Timespan(t2, t4),
2087 dataIds=[dict(instrument="Cam1", detector=2)])
2088 for timespan in allTimespans:
2089 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2090 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2091 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2092 if overlapsBefore and overlapsAfter:
2093 expected = Ambiguous
2094 elif overlapsBefore or overlapsAfter:
2095 expected = bias2a
2096 else:
2097 expected = None
2098 assertLookup(detector=2, timespan=timespan, expected=expected)
2100 def testSkipCalibs(self):
2101 """Test how queries handle skipping of calibration collections.
2102 """
2103 registry = self.makeRegistry()
2104 self.loadData(registry, "base.yaml")
2105 self.loadData(registry, "datasets.yaml")
2107 coll_calib = "Cam1/calibs/default"
2108 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2110 # Add all biases to the calibration collection.
2111 # Without this, the logic that prunes dataset subqueries based on
2112 # datasetType-collection summary information will fire before the logic
2113 # we want to test below. This is a good thing (it avoids the dreaded
2114 # NotImplementedError a bit more often) everywhere but here.
2115 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2117 coll_list = [coll_calib, "imported_g", "imported_r"]
2118 chain = "Cam1/chain"
2119 registry.registerCollection(chain, type=CollectionType.CHAINED)
2120 registry.setCollectionChain(chain, coll_list)
2122 # explicit list will raise if findFirst=True or there are temporal
2123 # dimensions
2124 with self.assertRaises(NotImplementedError):
2125 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2126 with self.assertRaises(NotImplementedError):
2127 registry.queryDataIds(["instrument", "detector", "exposure"], datasets="bias",
2128 collections=coll_list).count()
2130 # chain will skip
2131 datasets = list(registry.queryDatasets("bias", collections=chain))
2132 self.assertGreater(len(datasets), 0)
2134 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias",
2135 collections=chain))
2136 self.assertGreater(len(dataIds), 0)
2138 # glob will skip too
2139 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2140 self.assertGreater(len(datasets), 0)
2142 # regular expression will skip too
2143 pattern = re.compile(".*")
2144 datasets = list(registry.queryDatasets("bias", collections=pattern))
2145 self.assertGreater(len(datasets), 0)
2147 # ellipsis should work as usual
2148 datasets = list(registry.queryDatasets("bias", collections=...))
2149 self.assertGreater(len(datasets), 0)
2151 # few tests with findFirst
2152 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2153 self.assertGreater(len(datasets), 0)
2155 def testIngestTimeQuery(self):
2157 registry = self.makeRegistry()
2158 self.loadData(registry, "base.yaml")
2159 dt0 = datetime.utcnow()
2160 self.loadData(registry, "datasets.yaml")
2161 dt1 = datetime.utcnow()
2163 datasets = list(registry.queryDatasets(..., collections=...))
2164 len0 = len(datasets)
2165 self.assertGreater(len0, 0)
2167 where = "ingest_date > T'2000-01-01'"
2168 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2169 len1 = len(datasets)
2170 self.assertEqual(len0, len1)
2172 # no one will ever use this piece of software in 30 years
2173 where = "ingest_date > T'2050-01-01'"
2174 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2175 len2 = len(datasets)
2176 self.assertEqual(len2, 0)
2178 # Check more exact timing to make sure there is no 37 seconds offset
2179 # (after fixing DM-30124). SQLite time precision is 1 second, make
2180 # sure that we don't test with higher precision.
2181 tests = [
2182 # format: (timestamp, operator, expected_len)
2183 (dt0 - timedelta(seconds=1), ">", len0),
2184 (dt0 - timedelta(seconds=1), "<", 0),
2185 (dt1 + timedelta(seconds=1), "<", len0),
2186 (dt1 + timedelta(seconds=1), ">", 0),
2187 ]
2188 for dt, op, expect_len in tests:
2189 dt_str = dt.isoformat(sep=" ")
2191 where = f"ingest_date {op} T'{dt_str}'"
2192 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2193 self.assertEqual(len(datasets), expect_len)
2195 # same with bind using datetime or astropy Time
2196 where = f"ingest_date {op} ingest_time"
2197 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2198 bind={"ingest_time": dt}))
2199 self.assertEqual(len(datasets), expect_len)
2201 dt_astropy = astropy.time.Time(dt, format="datetime")
2202 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2203 bind={"ingest_time": dt_astropy}))
2204 self.assertEqual(len(datasets), expect_len)
2206 def testTimespanQueries(self):
2207 """Test query expressions involving timespans.
2208 """
2209 registry = self.makeRegistry()
2210 self.loadData(registry, "hsc-rc2-subset.yaml")
2211 # All exposures in the database; mapping from ID to timespan.
2212 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2213 # Just those IDs, sorted (which is also temporal sorting, because HSC
2214 # exposure IDs are monotonically increasing).
2215 ids = sorted(visits.keys())
2216 self.assertGreater(len(ids), 20)
2217 # Pick some quasi-random indexes into `ids` to play with.
2218 i1 = int(len(ids)*0.1)
2219 i2 = int(len(ids)*0.3)
2220 i3 = int(len(ids)*0.6)
2221 i4 = int(len(ids)*0.8)
2222 # Extract some times from those: just before the beginning of i1 (which
2223 # should be after the end of the exposure before), exactly the
2224 # beginning of i2, just after the beginning of i3 (and before its end),
2225 # and the exact end of i4.
2226 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2227 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2228 t2 = visits[ids[i2]].begin
2229 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2230 self.assertLess(t3, visits[ids[i3]].end)
2231 t4 = visits[ids[i4]].end
2232 # Make sure those are actually in order.
2233 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2235 bind = {
2236 "t1": t1,
2237 "t2": t2,
2238 "t3": t3,
2239 "t4": t4,
2240 "ts23": Timespan(t2, t3),
2241 }
2243 def query(where):
2244 """Helper function that queries for visit data IDs and returns
2245 results as a sorted, deduplicated list of visit IDs.
2246 """
2247 return sorted(
2248 {dataId["visit"] for dataId in registry.queryDataIds("visit",
2249 instrument="HSC",
2250 bind=bind,
2251 where=where)}
2252 )
2254 # Try a bunch of timespan queries, mixing up the bounds themselves,
2255 # where they appear in the expression, and how we get the timespan into
2256 # the expression.
2258 # t1 is before the start of i1, so this should not include i1.
2259 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2260 # t2 is exactly at the start of i2, but ends are exclusive, so these
2261 # should not include i2.
2262 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2263 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2264 # t3 is in the middle of i3, so this should include i3.
2265 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
2266 # This one should not include t3 by the same reasoning.
2267 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
2268 # t4 is exactly at the end of i4, so this should include i4.
2269 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2270 # i4's upper bound of t4 is exclusive so this should not include t4.
2271 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
2273 # Now some timespan vs. time scalar queries.
2274 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2275 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2276 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
2277 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
2278 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
2279 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2281 # Empty timespans should not overlap anything.
2282 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2284 def testCollectionSummaries(self):
2285 """Test recording and retrieval of collection summaries.
2286 """
2287 self.maxDiff = None
2288 registry = self.makeRegistry()
2289 # Importing datasets from yaml should go through the code path where
2290 # we update collection summaries as we insert datasets.
2291 self.loadData(registry, "base.yaml")
2292 self.loadData(registry, "datasets.yaml")
2293 flat = registry.getDatasetType("flat")
2294 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2295 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2296 expected1.datasetTypes.add(flat)
2297 expected1.dimensions.update_extract(
2298 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2299 )
2300 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2301 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2302 # Create a chained collection with both of the imported runs; the
2303 # summary should be the same, because it's a union with itself.
2304 chain = "chain"
2305 registry.registerCollection(chain, CollectionType.CHAINED)
2306 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2307 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2308 # Associate flats only into a tagged collection and a calibration
2309 # collection to check summaries of those.
2310 tag = "tag"
2311 registry.registerCollection(tag, CollectionType.TAGGED)
2312 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2313 calibs = "calibs"
2314 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2315 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
2316 timespan=Timespan(None, None))
2317 expected2 = expected1.copy()
2318 expected2.datasetTypes.discard("bias")
2319 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2320 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2321 # Explicitly calling Registry.refresh() should load those same
2322 # summaries, via a totally different code path.
2323 registry.refresh()
2324 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2325 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2326 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2327 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2329 def testUnrelatedDimensionQueries(self):
2330 """Test that WHERE expressions in queries can reference dimensions that
2331 are not in the result set.
2332 """
2333 registry = self.makeRegistry()
2334 # There is no data to back this query, but it should still return
2335 # zero records instead of raising.
2336 self.assertFalse(
2337 set(registry.queryDataIds(["visit", "detector"],
2338 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
2339 )
2341 def testBindInQueryDatasets(self):
2342 """Test that the bind parameter is correctly forwarded in
2343 queryDatasets recursion.
2344 """
2345 registry = self.makeRegistry()
2346 # Importing datasets from yaml should go through the code path where
2347 # we update collection summaries as we insert datasets.
2348 self.loadData(registry, "base.yaml")
2349 self.loadData(registry, "datasets.yaml")
2350 self.assertEqual(
2351 set(registry.queryDatasets("flat", band="r", collections=...)),
2352 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2353 )
2355 def testQueryResultSummaries(self):
2356 """Test summary methods like `count`, `any`, and `explain_no_results`
2357 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2358 """
2359 registry = self.makeRegistry()
2360 self.loadData(registry, "base.yaml")
2361 self.loadData(registry, "datasets.yaml")
2362 self.loadData(registry, "spatial.yaml")
2363 # Default test dataset has two collections, each with both flats and
2364 # biases. Add a new collection with only biases.
2365 registry.registerCollection("biases", CollectionType.TAGGED)
2366 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2367 # First query yields two results, and involves no postprocessing.
2368 query1 = registry.queryDataIds(["physical_filter"], band="r")
2369 self.assertTrue(query1.any(execute=False, exact=False))
2370 self.assertTrue(query1.any(execute=True, exact=False))
2371 self.assertTrue(query1.any(execute=True, exact=True))
2372 self.assertEqual(query1.count(exact=False), 2)
2373 self.assertEqual(query1.count(exact=True), 2)
2374 self.assertFalse(list(query1.explain_no_results()))
2375 # Second query should yield no results, but this isn't detectable
2376 # unless we actually run a query.
2377 query2 = registry.queryDataIds(["physical_filter"], band="h")
2378 self.assertTrue(query2.any(execute=False, exact=False))
2379 self.assertFalse(query2.any(execute=True, exact=False))
2380 self.assertFalse(query2.any(execute=True, exact=True))
2381 self.assertEqual(query2.count(exact=False), 0)
2382 self.assertEqual(query2.count(exact=True), 0)
2383 self.assertFalse(list(query2.explain_no_results()))
2384 # These queries yield no results due to various problems that can be
2385 # spotted prior to execution, yielding helpful diagnostics.
2386 for query, snippets in [
2387 (
2388 # Dataset type name doesn't match any existing dataset types.
2389 registry.queryDatasets("nonexistent", collections=...),
2390 ["nonexistent"],
2391 ),
2392 (
2393 # Dataset type name doesn't match any existing dataset types.
2394 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2395 ["nonexistent"],
2396 ),
2397 (
2398 # Dataset type object isn't registered.
2399 registry.queryDatasets(
2400 DatasetType(
2401 "nonexistent",
2402 dimensions=["instrument"],
2403 universe=registry.dimensions,
2404 storageClass="Image",
2405 ),
2406 collections=...
2407 ),
2408 ["nonexistent"],
2409 ),
2410 (
2411 # No datasets of this type in this collection.
2412 registry.queryDatasets("flat", collections=["biases"]),
2413 ["flat", "biases"],
2414 ),
2415 (
2416 # No collections matching at all.
2417 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2418 ["potato"],
2419 ),
2420 ]:
2422 self.assertFalse(query.any(execute=False, exact=False))
2423 self.assertFalse(query.any(execute=True, exact=False))
2424 self.assertFalse(query.any(execute=True, exact=True))
2425 self.assertEqual(query.count(exact=False), 0)
2426 self.assertEqual(query.count(exact=True), 0)
2427 messages = list(query.explain_no_results())
2428 self.assertTrue(messages)
2429 # Want all expected snippets to appear in at least one message.
2430 self.assertTrue(
2431 any(
2432 all(snippet in message for snippet in snippets)
2433 for message in query.explain_no_results()
2434 ),
2435 messages
2436 )
2438 # These queries yield no results due to problems that can be identified
2439 # by cheap follow-up queries, yielding helpful diagnostics.
2440 for query, snippets in [
2441 (
2442 # No records for one of the involved dimensions.
2443 registry.queryDataIds(["subfilter"]),
2444 ["dimension records", "subfilter"],
2445 ),
2446 ]:
2447 self.assertFalse(query.any(execute=True, exact=False))
2448 self.assertFalse(query.any(execute=True, exact=True))
2449 self.assertEqual(query.count(exact=True), 0)
2450 messages = list(query.explain_no_results())
2451 self.assertTrue(messages)
2452 # Want all expected snippets to appear in at least one message.
2453 self.assertTrue(
2454 any(
2455 all(snippet in message for snippet in snippets)
2456 for message in query.explain_no_results()
2457 ),
2458 messages
2459 )
2461 # This query yields four overlaps in the database, but one is filtered
2462 # out in postprocessing. The count queries aren't accurate because
2463 # they don't account for duplication that happens due to an internal
2464 # join against commonSkyPix.
2465 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2466 self.assertEqual(
2467 {
2468 DataCoordinate.standardize(
2469 instrument="Cam1",
2470 skymap="SkyMap1",
2471 visit=v,
2472 tract=t,
2473 universe=registry.dimensions,
2474 )
2475 for v, t in [(1, 0), (2, 0), (2, 1)]
2476 },
2477 set(query3),
2478 )
2479 self.assertTrue(query3.any(execute=False, exact=False))
2480 self.assertTrue(query3.any(execute=True, exact=False))
2481 self.assertTrue(query3.any(execute=True, exact=True))
2482 self.assertGreaterEqual(query3.count(exact=False), 4)
2483 self.assertGreaterEqual(query3.count(exact=True), 3)
2484 self.assertFalse(list(query3.explain_no_results()))
2485 # This query yields overlaps in the database, but all are filtered
2486 # out in postprocessing. The count queries again aren't very useful.
2487 # We have to use `where=` here to avoid an optimization that
2488 # (currently) skips the spatial postprocess-filtering because it
2489 # recognizes that no spatial join is necessary. That's not ideal, but
2490 # fixing it is out of scope for this ticket.
2491 query4 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1",
2492 where="visit=1 AND detector=1 AND tract=0 AND patch=4")
2493 self.assertFalse(set(query4))
2494 self.assertTrue(query4.any(execute=False, exact=False))
2495 self.assertTrue(query4.any(execute=True, exact=False))
2496 self.assertFalse(query4.any(execute=True, exact=True))
2497 self.assertGreaterEqual(query4.count(exact=False), 1)
2498 self.assertEqual(query4.count(exact=True), 0)
2499 messages = list(query4.explain_no_results())
2500 self.assertTrue(messages)
2501 self.assertTrue(
2502 any(
2503 "regions did not overlap" in message
2504 for message in messages
2505 )
2506 )
2508 def testQueryDataIdsOrderBy(self):
2509 """Test order_by and limit on result returned by queryDataIds().
2510 """
2511 registry = self.makeRegistry()
2512 self.loadData(registry, "base.yaml")
2513 self.loadData(registry, "datasets.yaml")
2514 self.loadData(registry, "spatial.yaml")
2516 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2517 return registry.queryDataIds(dimensions, datasets=datasets, collections=collections,
2518 instrument="Cam1", skymap="SkyMap1")
2520 # query = do_query()
2521 # self.assertEqual(len(list(query)), 6)
2523 Test = namedtuple(
2524 "testQueryDataIdsOrderByTest",
2525 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2526 defaults=(None, None, None),
2527 )
2529 # For each test four items are defined here:
2530 # - order_by column names, comma separated
2531 # - limit tuple or None
2532 # - DataId keys to extract
2533 # - tuple of the resulting values we expect
2534 test_data = (
2535 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2536 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2537 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2538 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2539 Test("tract.id,visit.id", "tract,visit", ((0, 1), (0, 1), (0, 2)), limit=(3, ),),
2540 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3, )),
2541 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2542 Test("-tract,-visit", "tract,visit", ((0, 1), ), limit=(3, 5)),
2543 Test("tract,visit.exposure_time", "tract,visit",
2544 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2545 Test("-tract,-visit.exposure_time", "tract,visit",
2546 ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2547 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2548 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2549 Test("tract,-timespan.begin,timespan.end", "tract,visit",
2550 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2551 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2552 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2553 Test("tract,detector", "tract,detector",
2554 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2555 datasets="flat", collections="imported_r"),
2556 Test("tract,detector.full_name", "tract,detector",
2557 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2558 datasets="flat", collections="imported_r"),
2559 Test("tract,detector.raft,detector.name_in_raft", "tract,detector",
2560 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2561 datasets="flat", collections="imported_r"),
2562 )
2564 for test in test_data:
2565 order_by = test.order_by.split(",")
2566 keys = test.keys.split(",")
2567 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2568 if test.limit is not None:
2569 query = query.limit(*test.limit)
2570 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2571 self.assertEqual(dataIds, test.result)
2573 # and materialize
2574 query = do_query(keys).order_by(*order_by)
2575 if test.limit is not None:
2576 query = query.limit(*test.limit)
2577 with query.materialize() as materialized:
2578 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2579 self.assertEqual(dataIds, test.result)
2581 # errors in a name
2582 for order_by in ("", "-"):
2583 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2584 list(do_query().order_by(order_by))
2586 for order_by in ("undimension.name", "-undimension.name"):
2587 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2588 list(do_query().order_by(order_by))
2590 for order_by in ("attract", "-attract"):
2591 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2592 list(do_query().order_by(order_by))
2594 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2595 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2597 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2598 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2600 with self.assertRaisesRegex(ValueError,
2601 "Cannot find any temporal dimension element for 'timespan.begin'"):
2602 list(do_query(("tract")).order_by("timespan.begin"))
2604 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2605 list(do_query(("tract")).order_by("tract.timespan.begin"))
2607 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2608 list(do_query(("tract")).order_by("tract.name"))
2610 def testQueryDimensionRecordsOrderBy(self):
2611 """Test order_by and limit on result returned by
2612 queryDimensionRecords().
2613 """
2614 registry = self.makeRegistry()
2615 self.loadData(registry, "base.yaml")
2616 self.loadData(registry, "datasets.yaml")
2617 self.loadData(registry, "spatial.yaml")
2619 def do_query():
2620 return registry.queryDimensionRecords("detector", instrument="Cam1")
2622 query = do_query()
2623 self.assertEqual(len(list(query)), 4)
2625 # For each test three items are defined here:
2626 # - order_by column names, comma separated
2627 # - limit tuple or None
2628 # - tuple of the detector IDs
2629 test_data = (
2630 ("detector", None, (1, 2, 3, 4)),
2631 ("-detector", None, (4, 3, 2, 1)),
2632 ("raft,-name_in_raft", None, (2, 1, 4, 3)),
2633 ("-detector.purpose", (1, ), (4, )),
2634 ("-purpose,detector.raft,name_in_raft", (2, 2), (2, 3)),
2635 )
2637 for order_by, limit, expected in test_data:
2638 order_by = order_by.split(",")
2639 query = do_query().order_by(*order_by)
2640 if limit is not None:
2641 query = query.limit(*limit)
2642 dataIds = tuple(rec.id for rec in query)
2643 self.assertEqual(dataIds, expected)
2645 # errors in a name
2646 for order_by in ("", "-"):
2647 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2648 list(do_query().order_by(order_by))
2650 for order_by in ("undimension.name", "-undimension.name"):
2651 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2652 list(do_query().order_by(order_by))
2654 for order_by in ("attract", "-attract"):
2655 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2656 list(do_query().order_by(order_by))
2658 def testDatasetConstrainedDimensionRecordQueries(self):
2659 """Test that queryDimensionRecords works even when given a dataset
2660 constraint whose dimensions extend beyond the requested dimension
2661 element's.
2662 """
2663 registry = self.makeRegistry()
2664 self.loadData(registry, "base.yaml")
2665 self.loadData(registry, "datasets.yaml")
2666 # Query for physical_filter dimension records, using a dataset that
2667 # has both physical_filter and dataset dimensions.
2668 records = registry.queryDimensionRecords(
2669 "physical_filter",
2670 datasets=["flat"],
2671 collections="imported_r",
2672 )
2673 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})