Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 5%
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict, namedtuple
27from datetime import datetime, timedelta
28import itertools
29import logging
30import os
31import re
32from typing import Iterator
33import unittest
34import uuid
36import astropy.time
37import sqlalchemy
38from typing import Optional, Type, Union, TYPE_CHECKING
40try:
41 import numpy as np
42except ImportError:
43 np = None
45import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 ddl,
56 Timespan,
57)
58from ..interfaces import DatasetIdGenEnum
59from ..summaries import CollectionSummary
60from .._collectionType import CollectionType
61from .._config import RegistryConfig
63from .._exceptions import (
64 ConflictingDefinitionError,
65 InconsistentDataIdError,
66 MissingCollectionError,
67 OrphanedRecordError,
68)
71from ..interfaces import ButlerAttributeExistsError
73if TYPE_CHECKING: 73 ↛ 74line 73 didn't jump to line 74, because the condition on line 73 was never true
74 from .._registry import Registry
77class RegistryTests(ABC):
78 """Generic tests for the `Registry` class that can be subclassed to
79 generate tests for different configurations.
80 """
82 collectionsManager: Optional[str] = None
83 """Name of the collections manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 datasetsManager: Optional[str] = None
89 """Name of the datasets manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 @classmethod
95 @abstractmethod
96 def getDataDir(cls) -> str:
97 """Return the root directory containing test data YAML files.
98 """
99 raise NotImplementedError()
101 def makeRegistryConfig(self) -> RegistryConfig:
102 """Create RegistryConfig used to create a registry.
104 This method should be called by a subclass from `makeRegistry`.
105 Returned instance will be pre-configured based on the values of class
106 members, and default-configured for all other parameters. Subclasses
107 that need default configuration should just instantiate
108 `RegistryConfig` directly.
109 """
110 config = RegistryConfig()
111 if self.collectionsManager:
112 config["managers", "collections"] = self.collectionsManager
113 if self.datasetsManager:
114 config["managers", "datasets"] = self.datasetsManager
115 return config
117 @abstractmethod
118 def makeRegistry(self) -> Registry:
119 """Return the Registry instance to be tested.
120 """
121 raise NotImplementedError()
123 def loadData(self, registry: Registry, filename: str):
124 """Load registry test data from ``getDataDir/<filename>``,
125 which should be a YAML import/export file.
126 """
127 from ...transfers import YamlRepoImportBackend
128 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
129 backend = YamlRepoImportBackend(stream, registry)
130 backend.register()
131 backend.load(datastore=None)
133 def checkQueryResults(self, results, expected):
134 """Check that a query results object contains expected values.
136 Parameters
137 ----------
138 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
139 A lazy-evaluation query results object.
140 expected : `list`
141 A list of `DataCoordinate` o `DatasetRef` objects that should be
142 equal to results of the query, aside from ordering.
143 """
144 self.assertCountEqual(list(results), expected)
145 self.assertEqual(results.count(), len(expected))
146 if expected:
147 self.assertTrue(results.any())
148 else:
149 self.assertFalse(results.any())
151 def testOpaque(self):
152 """Tests for `Registry.registerOpaqueTable`,
153 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
154 `Registry.deleteOpaqueData`.
155 """
156 registry = self.makeRegistry()
157 table = "opaque_table_for_testing"
158 registry.registerOpaqueTable(
159 table,
160 spec=ddl.TableSpec(
161 fields=[
162 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
163 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
164 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
165 ],
166 )
167 )
168 rows = [
169 {"id": 1, "name": "one", "count": None},
170 {"id": 2, "name": "two", "count": 5},
171 {"id": 3, "name": "three", "count": 6},
172 ]
173 registry.insertOpaqueData(table, *rows)
174 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
175 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
176 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
177 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
178 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
179 # Test very long IN clause which exceeds sqlite limit on number of
180 # parameters. SQLite says the limit is 32k but it looks like it is
181 # much higher.
182 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
183 # Two IN clauses, each longer than 1k batch size, first with
184 # duplicates, second has matching elements in different batches (after
185 # sorting).
186 self.assertEqual(rows[0:2], list(registry.fetchOpaqueData(
187 table,
188 id=list(range(1000)) + list(range(100, 0, -1)),
189 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"])))
190 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
191 registry.deleteOpaqueData(table, id=3)
192 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
193 registry.deleteOpaqueData(table)
194 self.assertEqual([], list(registry.fetchOpaqueData(table)))
196 def testDatasetType(self):
197 """Tests for `Registry.registerDatasetType` and
198 `Registry.getDatasetType`.
199 """
200 registry = self.makeRegistry()
201 # Check valid insert
202 datasetTypeName = "test"
203 storageClass = StorageClass("testDatasetType")
204 registry.storageClasses.registerStorageClass(storageClass)
205 dimensions = registry.dimensions.extract(("instrument", "visit"))
206 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
207 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
208 # Inserting for the first time should return True
209 self.assertTrue(registry.registerDatasetType(inDatasetType))
210 outDatasetType1 = registry.getDatasetType(datasetTypeName)
211 self.assertEqual(outDatasetType1, inDatasetType)
213 # Re-inserting should work
214 self.assertFalse(registry.registerDatasetType(inDatasetType))
215 # Except when they are not identical
216 with self.assertRaises(ConflictingDefinitionError):
217 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
218 registry.registerDatasetType(nonIdenticalDatasetType)
220 # Template can be None
221 datasetTypeName = "testNoneTemplate"
222 storageClass = StorageClass("testDatasetType2")
223 registry.storageClasses.registerStorageClass(storageClass)
224 dimensions = registry.dimensions.extract(("instrument", "visit"))
225 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
226 registry.registerDatasetType(inDatasetType)
227 outDatasetType2 = registry.getDatasetType(datasetTypeName)
228 self.assertEqual(outDatasetType2, inDatasetType)
230 allTypes = set(registry.queryDatasetTypes())
231 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
233 def testDimensions(self):
234 """Tests for `Registry.insertDimensionData`,
235 `Registry.syncDimensionData`, and `Registry.expandDataId`.
236 """
237 registry = self.makeRegistry()
238 dimensionName = "instrument"
239 dimension = registry.dimensions[dimensionName]
240 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
241 "class_name": "lsst.obs.base.Instrument"}
242 registry.insertDimensionData(dimensionName, dimensionValue)
243 # Inserting the same value twice should fail
244 with self.assertRaises(sqlalchemy.exc.IntegrityError):
245 registry.insertDimensionData(dimensionName, dimensionValue)
246 # expandDataId should retrieve the record we just inserted
247 self.assertEqual(
248 registry.expandDataId(
249 instrument="DummyCam",
250 graph=dimension.graph
251 ).records[dimensionName].toDict(),
252 dimensionValue
253 )
254 # expandDataId should raise if there is no record with the given ID.
255 with self.assertRaises(LookupError):
256 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
257 # band doesn't have a table; insert should fail.
258 with self.assertRaises(TypeError):
259 registry.insertDimensionData("band", {"band": "i"})
260 dimensionName2 = "physical_filter"
261 dimension2 = registry.dimensions[dimensionName2]
262 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
263 # Missing required dependency ("instrument") should fail
264 with self.assertRaises(KeyError):
265 registry.insertDimensionData(dimensionName2, dimensionValue2)
266 # Adding required dependency should fix the failure
267 dimensionValue2["instrument"] = "DummyCam"
268 registry.insertDimensionData(dimensionName2, dimensionValue2)
269 # expandDataId should retrieve the record we just inserted.
270 self.assertEqual(
271 registry.expandDataId(
272 instrument="DummyCam", physical_filter="DummyCam_i",
273 graph=dimension2.graph
274 ).records[dimensionName2].toDict(),
275 dimensionValue2
276 )
277 # Use syncDimensionData to insert a new record successfully.
278 dimensionName3 = "detector"
279 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
280 "name_in_raft": "zero", "purpose": "SCIENCE"}
281 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
282 # Sync that again. Note that one field ("raft") is NULL, and that
283 # should be okay.
284 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
285 # Now try that sync with the same primary key but a different value.
286 # This should fail.
287 with self.assertRaises(ConflictingDefinitionError):
288 registry.syncDimensionData(
289 dimensionName3,
290 {"instrument": "DummyCam", "id": 1, "full_name": "one",
291 "name_in_raft": "four", "purpose": "SCIENCE"}
292 )
294 @unittest.skipIf(np is None, "numpy not available.")
295 def testNumpyDataId(self):
296 """Test that we can use a numpy int in a dataId."""
297 registry = self.makeRegistry()
298 dimensionEntries = [
299 ("instrument", {"instrument": "DummyCam"}),
300 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
301 # Using an np.int64 here fails unless Records.fromDict is also
302 # patched to look for numbers.Integral
303 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
304 ]
305 for args in dimensionEntries:
306 registry.insertDimensionData(*args)
308 # Try a normal integer and something that looks like an int but
309 # is not.
310 for visit_id in (42, np.int64(42)):
311 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
312 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
313 self.assertEqual(expanded["visit"], int(visit_id))
314 self.assertIsInstance(expanded["visit"], int)
316 def testDataIdRelationships(self):
317 """Test that `Registry.expandDataId` raises an exception when the given
318 keys are inconsistent.
319 """
320 registry = self.makeRegistry()
321 self.loadData(registry, "base.yaml")
322 # Insert a few more dimension records for the next test.
323 registry.insertDimensionData(
324 "exposure",
325 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
326 )
327 registry.insertDimensionData(
328 "exposure",
329 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
330 )
331 registry.insertDimensionData(
332 "visit_system",
333 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
334 )
335 registry.insertDimensionData(
336 "visit",
337 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
338 )
339 registry.insertDimensionData(
340 "visit_definition",
341 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
342 )
343 with self.assertRaises(InconsistentDataIdError):
344 registry.expandDataId(
345 {"instrument": "Cam1", "visit": 1, "exposure": 2},
346 )
348 def testDataset(self):
349 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
350 and `Registry.removeDatasets`.
351 """
352 registry = self.makeRegistry()
353 self.loadData(registry, "base.yaml")
354 run = "test"
355 registry.registerRun(run)
356 datasetType = registry.getDatasetType("bias")
357 dataId = {"instrument": "Cam1", "detector": 2}
358 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
359 outRef = registry.getDataset(ref.id)
360 self.assertIsNotNone(ref.id)
361 self.assertEqual(ref, outRef)
362 with self.assertRaises(ConflictingDefinitionError):
363 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
364 registry.removeDatasets([ref])
365 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
367 def testFindDataset(self):
368 """Tests for `Registry.findDataset`.
369 """
370 registry = self.makeRegistry()
371 self.loadData(registry, "base.yaml")
372 run = "test"
373 datasetType = registry.getDatasetType("bias")
374 dataId = {"instrument": "Cam1", "detector": 4}
375 registry.registerRun(run)
376 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
377 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
378 self.assertEqual(outputRef, inputRef)
379 # Check that retrieval with invalid dataId raises
380 with self.assertRaises(LookupError):
381 dataId = {"instrument": "Cam1"} # no detector
382 registry.findDataset(datasetType, dataId, collections=run)
383 # Check that different dataIds match to different datasets
384 dataId1 = {"instrument": "Cam1", "detector": 1}
385 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
386 dataId2 = {"instrument": "Cam1", "detector": 2}
387 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
388 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
389 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
390 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
391 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
392 # Check that requesting a non-existing dataId returns None
393 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
394 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
396 def testRemoveDatasetTypeSuccess(self):
397 """Test that Registry.removeDatasetType works when there are no
398 datasets of that type present.
399 """
400 registry = self.makeRegistry()
401 self.loadData(registry, "base.yaml")
402 registry.removeDatasetType("flat")
403 with self.assertRaises(KeyError):
404 registry.getDatasetType("flat")
406 def testRemoveDatasetTypeFailure(self):
407 """Test that Registry.removeDatasetType raises when there are datasets
408 of that type present or if the dataset type is for a component.
409 """
410 registry = self.makeRegistry()
411 self.loadData(registry, "base.yaml")
412 self.loadData(registry, "datasets.yaml")
413 with self.assertRaises(OrphanedRecordError):
414 registry.removeDatasetType("flat")
415 with self.assertRaises(ValueError):
416 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
418 def testImportDatasetsUUID(self):
419 """Test for `Registry._importDatasets` with UUID dataset ID.
420 """
421 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
422 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
424 registry = self.makeRegistry()
425 self.loadData(registry, "base.yaml")
426 for run in range(6):
427 registry.registerRun(f"run{run}")
428 datasetTypeBias = registry.getDatasetType("bias")
429 datasetTypeFlat = registry.getDatasetType("flat")
430 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
431 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
432 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
434 dataset_id = uuid.uuid4()
435 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
436 ref1, = registry._importDatasets([ref])
437 # UUID is used without change
438 self.assertEqual(ref.id, ref1.id)
440 # All different failure modes
441 refs = (
442 # Importing same DatasetRef with different dataset ID is an error
443 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
444 # Same DatasetId but different DataId
445 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
446 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
447 # Same DatasetRef and DatasetId but different run
448 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
449 )
450 for ref in refs:
451 with self.assertRaises(ConflictingDefinitionError):
452 registry._importDatasets([ref])
454 # Test for non-unique IDs, they can be re-imported multiple times.
455 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
456 with self.subTest(idGenMode=idGenMode):
458 # Use integer dataset ID to force UUID calculation in _import
459 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
460 ref1, = registry._importDatasets([ref], idGenerationMode=idGenMode)
461 self.assertIsInstance(ref1.id, uuid.UUID)
462 self.assertEqual(ref1.id.version, 5)
464 # Importing it again is OK
465 ref2, = registry._importDatasets([ref1])
466 self.assertEqual(ref2.id, ref1.id)
468 # Cannot import to different run with the same ID
469 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
470 with self.assertRaises(ConflictingDefinitionError):
471 registry._importDatasets([ref])
473 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
474 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
475 # Cannot import same DATAID_TYPE ref into a new run
476 with self.assertRaises(ConflictingDefinitionError):
477 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
478 else:
479 # DATAID_TYPE_RUN ref can be imported into a new run
480 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
482 def testImportDatasetsInt(self):
483 """Test for `Registry._importDatasets` with integer dataset ID.
484 """
485 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
486 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
488 registry = self.makeRegistry()
489 self.loadData(registry, "base.yaml")
490 run = "test"
491 registry.registerRun(run)
492 datasetTypeBias = registry.getDatasetType("bias")
493 datasetTypeFlat = registry.getDatasetType("flat")
494 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
495 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
496 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
497 dataset_id = 999999999
499 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
500 ref1, = registry._importDatasets([ref])
501 # Should make new integer ID.
502 self.assertNotEqual(ref1.id, ref.id)
504 # Ingesting same dataId with different dataset ID is an error
505 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
506 with self.assertRaises(ConflictingDefinitionError):
507 registry._importDatasets([ref2])
509 # Ingesting different dataId with the same dataset ID should work
510 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
511 ref4, = registry._importDatasets([ref3])
512 self.assertNotEqual(ref4.id, ref1.id)
514 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
515 ref4, = registry._importDatasets([ref3])
516 self.assertNotEqual(ref4.id, ref1.id)
518 def testDatasetTypeComponentQueries(self):
519 """Test component options when querying for dataset types.
520 """
521 registry = self.makeRegistry()
522 self.loadData(registry, "base.yaml")
523 self.loadData(registry, "datasets.yaml")
524 # Test querying for dataset types with different inputs.
525 # First query for all dataset types; components should only be included
526 # when components=True.
527 self.assertEqual(
528 {"bias", "flat"},
529 NamedValueSet(registry.queryDatasetTypes()).names
530 )
531 self.assertEqual(
532 {"bias", "flat"},
533 NamedValueSet(registry.queryDatasetTypes(components=False)).names
534 )
535 self.assertLess(
536 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
537 NamedValueSet(registry.queryDatasetTypes(components=True)).names
538 )
539 # Use a pattern that can match either parent or components. Again,
540 # components are only returned if components=True.
541 self.assertEqual(
542 {"bias"},
543 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
544 )
545 self.assertEqual(
546 {"bias"},
547 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
548 )
549 self.assertLess(
550 {"bias", "bias.wcs"},
551 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
552 )
553 # This pattern matches only a component. In this case we also return
554 # that component dataset type if components=None.
555 self.assertEqual(
556 {"bias.wcs"},
557 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
558 )
559 self.assertEqual(
560 set(),
561 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
562 )
563 self.assertEqual(
564 {"bias.wcs"},
565 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
566 )
567 # Add a dataset type using a StorageClass that we'll then remove; check
568 # that this does not affect our ability to query for dataset types
569 # (though it will warn).
570 tempStorageClass = StorageClass(
571 name="TempStorageClass",
572 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
573 )
574 registry.storageClasses.registerStorageClass(tempStorageClass)
575 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
576 universe=registry.dimensions)
577 registry.registerDatasetType(datasetType)
578 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
579 datasetType._storageClass = None
580 del tempStorageClass
581 # Querying for all dataset types, including components, should include
582 # at least all non-component dataset types (and I don't want to
583 # enumerate all of the Exposure components for bias and flat here).
584 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
585 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
586 self.assertIn("TempStorageClass", cm.output[0])
587 self.assertLess({"bias", "flat", "temporary"}, everything.names)
588 # It should not include "temporary.columns", because we tried to remove
589 # the storage class that would tell it about that. So if the next line
590 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
591 # this part of the test isn't doing anything, because the _unregister
592 # call about isn't simulating the real-life case we want it to
593 # simulate, in which different versions of daf_butler in entirely
594 # different Python processes interact with the same repo.
595 self.assertNotIn("temporary.data", everything.names)
596 # Query for dataset types that start with "temp". This should again
597 # not include the component, and also not fail.
598 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
599 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
600 self.assertIn("TempStorageClass", cm.output[0])
601 self.assertEqual({"temporary"}, startsWithTemp.names)
603 def testComponentLookups(self):
604 """Test searching for component datasets via their parents.
605 """
606 registry = self.makeRegistry()
607 self.loadData(registry, "base.yaml")
608 self.loadData(registry, "datasets.yaml")
609 # Test getting the child dataset type (which does still exist in the
610 # Registry), and check for consistency with
611 # DatasetRef.makeComponentRef.
612 collection = "imported_g"
613 parentType = registry.getDatasetType("bias")
614 childType = registry.getDatasetType("bias.wcs")
615 parentRefResolved = registry.findDataset(parentType, collections=collection,
616 instrument="Cam1", detector=1)
617 self.assertIsInstance(parentRefResolved, DatasetRef)
618 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
619 # Search for a single dataset with findDataset.
620 childRef1 = registry.findDataset("bias.wcs", collections=collection,
621 dataId=parentRefResolved.dataId)
622 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
623 # Search for detector data IDs constrained by component dataset
624 # existence with queryDataIds.
625 dataIds = registry.queryDataIds(
626 ["detector"],
627 datasets=["bias.wcs"],
628 collections=collection,
629 ).toSet()
630 self.assertEqual(
631 dataIds,
632 DataCoordinateSet(
633 {
634 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
635 for d in (1, 2, 3)
636 },
637 parentType.dimensions,
638 )
639 )
640 # Search for multiple datasets of a single type with queryDatasets.
641 childRefs2 = set(registry.queryDatasets(
642 "bias.wcs",
643 collections=collection,
644 ))
645 self.assertEqual(
646 {ref.unresolved() for ref in childRefs2},
647 {DatasetRef(childType, dataId) for dataId in dataIds}
648 )
650 def testCollections(self):
651 """Tests for registry methods that manage collections.
652 """
653 registry = self.makeRegistry()
654 self.loadData(registry, "base.yaml")
655 self.loadData(registry, "datasets.yaml")
656 run1 = "imported_g"
657 run2 = "imported_r"
658 # Test setting a collection docstring after it has been created.
659 registry.setCollectionDocumentation(run1, "doc for run1")
660 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
661 registry.setCollectionDocumentation(run1, None)
662 self.assertIsNone(registry.getCollectionDocumentation(run1))
663 datasetType = "bias"
664 # Find some datasets via their run's collection.
665 dataId1 = {"instrument": "Cam1", "detector": 1}
666 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
667 self.assertIsNotNone(ref1)
668 dataId2 = {"instrument": "Cam1", "detector": 2}
669 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
670 self.assertIsNotNone(ref2)
671 # Associate those into a new collection,then look for them there.
672 tag1 = "tag1"
673 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
674 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
675 registry.associate(tag1, [ref1, ref2])
676 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
677 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
678 # Disassociate one and verify that we can't it there anymore...
679 registry.disassociate(tag1, [ref1])
680 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
681 # ...but we can still find ref2 in tag1, and ref1 in the run.
682 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
683 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
684 collections = set(registry.queryCollections())
685 self.assertEqual(collections, {run1, run2, tag1})
686 # Associate both refs into tag1 again; ref2 is already there, but that
687 # should be a harmless no-op.
688 registry.associate(tag1, [ref1, ref2])
689 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
690 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
691 # Get a different dataset (from a different run) that has the same
692 # dataset type and data ID as ref2.
693 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
694 self.assertNotEqual(ref2, ref2b)
695 # Attempting to associate that into tag1 should be an error.
696 with self.assertRaises(ConflictingDefinitionError):
697 registry.associate(tag1, [ref2b])
698 # That error shouldn't have messed up what we had before.
699 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
700 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
701 # Attempt to associate the conflicting dataset again, this time with
702 # a dataset that isn't in the collection and won't cause a conflict.
703 # Should also fail without modifying anything.
704 dataId3 = {"instrument": "Cam1", "detector": 3}
705 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
706 with self.assertRaises(ConflictingDefinitionError):
707 registry.associate(tag1, [ref3, ref2b])
708 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
709 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
710 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
711 # Register a chained collection that searches [tag1, run2]
712 chain1 = "chain1"
713 registry.registerCollection(chain1, type=CollectionType.CHAINED)
714 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
715 # Chained collection exists, but has no collections in it.
716 self.assertFalse(registry.getCollectionChain(chain1))
717 # If we query for all collections, we should get the chained collection
718 # only if we don't ask to flatten it (i.e. yield only its children).
719 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
720 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
721 # Attempt to set its child collections to something circular; that
722 # should fail.
723 with self.assertRaises(ValueError):
724 registry.setCollectionChain(chain1, [tag1, chain1])
725 # Add the child collections.
726 registry.setCollectionChain(chain1, [tag1, run2])
727 self.assertEqual(
728 list(registry.getCollectionChain(chain1)),
729 [tag1, run2]
730 )
731 # Searching for dataId1 or dataId2 in the chain should return ref1 and
732 # ref2, because both are in tag1.
733 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
734 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
735 # Now disassociate ref2 from tag1. The search (for bias) with
736 # dataId2 in chain1 should then:
737 # 1. not find it in tag1
738 # 2. find a different dataset in run2
739 registry.disassociate(tag1, [ref2])
740 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
741 self.assertNotEqual(ref2b, ref2)
742 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
743 # Define a new chain so we can test recursive chains.
744 chain2 = "chain2"
745 registry.registerCollection(chain2, type=CollectionType.CHAINED)
746 registry.setCollectionChain(chain2, [run2, chain1])
747 # Query for collections matching a regex.
748 self.assertCountEqual(
749 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
750 ["imported_r", "imported_g"]
751 )
752 # Query for collections matching a regex or an explicit str.
753 self.assertCountEqual(
754 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
755 ["imported_r", "imported_g", "chain1"]
756 )
757 # Search for bias with dataId1 should find it via tag1 in chain2,
758 # recursing, because is not in run1.
759 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
760 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
761 # Search for bias with dataId2 should find it in run2 (ref2b).
762 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
763 # Search for a flat that is in run2. That should not be found
764 # at the front of chain2, because of the restriction to bias
765 # on run2 there, but it should be found in at the end of chain1.
766 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
767 ref4 = registry.findDataset("flat", dataId4, collections=run2)
768 self.assertIsNotNone(ref4)
769 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
770 # Deleting a collection that's part of a CHAINED collection is not
771 # allowed, and is exception-safe.
772 with self.assertRaises(Exception):
773 registry.removeCollection(run2)
774 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
775 with self.assertRaises(Exception):
776 registry.removeCollection(chain1)
777 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
778 # Actually remove chain2, test that it's gone by asking for its type.
779 registry.removeCollection(chain2)
780 with self.assertRaises(MissingCollectionError):
781 registry.getCollectionType(chain2)
782 # Actually remove run2 and chain1, which should work now.
783 registry.removeCollection(chain1)
784 registry.removeCollection(run2)
785 with self.assertRaises(MissingCollectionError):
786 registry.getCollectionType(run2)
787 with self.assertRaises(MissingCollectionError):
788 registry.getCollectionType(chain1)
789 # Remove tag1 as well, just to test that we can remove TAGGED
790 # collections.
791 registry.removeCollection(tag1)
792 with self.assertRaises(MissingCollectionError):
793 registry.getCollectionType(tag1)
795 def testCollectionChainFlatten(self):
796 """Test that Registry.setCollectionChain obeys its 'flatten' option.
797 """
798 registry = self.makeRegistry()
799 registry.registerCollection("inner", CollectionType.CHAINED)
800 registry.registerCollection("innermost", CollectionType.RUN)
801 registry.setCollectionChain("inner", ["innermost"])
802 registry.registerCollection("outer", CollectionType.CHAINED)
803 registry.setCollectionChain("outer", ["inner"], flatten=False)
804 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
805 registry.setCollectionChain("outer", ["inner"], flatten=True)
806 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
808 def testBasicTransaction(self):
809 """Test that all operations within a single transaction block are
810 rolled back if an exception propagates out of the block.
811 """
812 registry = self.makeRegistry()
813 storageClass = StorageClass("testDatasetType")
814 registry.storageClasses.registerStorageClass(storageClass)
815 with registry.transaction():
816 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
817 with self.assertRaises(ValueError):
818 with registry.transaction():
819 registry.insertDimensionData("instrument", {"name": "Cam2"})
820 raise ValueError("Oops, something went wrong")
821 # Cam1 should exist
822 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
823 # But Cam2 and Cam3 should both not exist
824 with self.assertRaises(LookupError):
825 registry.expandDataId(instrument="Cam2")
826 with self.assertRaises(LookupError):
827 registry.expandDataId(instrument="Cam3")
829 def testNestedTransaction(self):
830 """Test that operations within a transaction block are not rolled back
831 if an exception propagates out of an inner transaction block and is
832 then caught.
833 """
834 registry = self.makeRegistry()
835 dimension = registry.dimensions["instrument"]
836 dataId1 = {"instrument": "DummyCam"}
837 dataId2 = {"instrument": "DummyCam2"}
838 checkpointReached = False
839 with registry.transaction():
840 # This should be added and (ultimately) committed.
841 registry.insertDimensionData(dimension, dataId1)
842 with self.assertRaises(sqlalchemy.exc.IntegrityError):
843 with registry.transaction(savepoint=True):
844 # This does not conflict, and should succeed (but not
845 # be committed).
846 registry.insertDimensionData(dimension, dataId2)
847 checkpointReached = True
848 # This should conflict and raise, triggerring a rollback
849 # of the previous insertion within the same transaction
850 # context, but not the original insertion in the outer
851 # block.
852 registry.insertDimensionData(dimension, dataId1)
853 self.assertTrue(checkpointReached)
854 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
855 with self.assertRaises(LookupError):
856 registry.expandDataId(dataId2, graph=dimension.graph)
858 def testInstrumentDimensions(self):
859 """Test queries involving only instrument dimensions, with no joins to
860 skymap."""
861 registry = self.makeRegistry()
863 # need a bunch of dimensions and datasets for test
864 registry.insertDimensionData(
865 "instrument",
866 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
867 )
868 registry.insertDimensionData(
869 "physical_filter",
870 dict(instrument="DummyCam", name="dummy_r", band="r"),
871 dict(instrument="DummyCam", name="dummy_i", band="i"),
872 )
873 registry.insertDimensionData(
874 "detector",
875 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
876 )
877 registry.insertDimensionData(
878 "visit_system",
879 dict(instrument="DummyCam", id=1, name="default"),
880 )
881 registry.insertDimensionData(
882 "visit",
883 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
884 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
885 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
886 )
887 registry.insertDimensionData(
888 "exposure",
889 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
890 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
891 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
892 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
893 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
894 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
895 )
896 registry.insertDimensionData(
897 "visit_definition",
898 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
899 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
900 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
901 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
902 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
903 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
904 )
905 # dataset types
906 run1 = "test1_r"
907 run2 = "test2_r"
908 tagged2 = "test2_t"
909 registry.registerRun(run1)
910 registry.registerRun(run2)
911 registry.registerCollection(tagged2)
912 storageClass = StorageClass("testDataset")
913 registry.storageClasses.registerStorageClass(storageClass)
914 rawType = DatasetType(name="RAW",
915 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
916 storageClass=storageClass)
917 registry.registerDatasetType(rawType)
918 calexpType = DatasetType(name="CALEXP",
919 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
920 storageClass=storageClass)
921 registry.registerDatasetType(calexpType)
923 # add pre-existing datasets
924 for exposure in (100, 101, 110, 111):
925 for detector in (1, 2, 3):
926 # note that only 3 of 5 detectors have datasets
927 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
928 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
929 # exposures 100 and 101 appear in both run1 and tagged2.
930 # 100 has different datasets in the different collections
931 # 101 has the same dataset in both collections.
932 if exposure == 100:
933 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
934 if exposure in (100, 101):
935 registry.associate(tagged2, [ref])
936 # Add pre-existing datasets to tagged2.
937 for exposure in (200, 201):
938 for detector in (3, 4, 5):
939 # note that only 3 of 5 detectors have datasets
940 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
941 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
942 registry.associate(tagged2, [ref])
944 dimensions = DimensionGraph(
945 registry.dimensions,
946 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
947 )
948 # Test that single dim string works as well as list of str
949 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
950 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
951 self.assertEqual(rows, rowsI)
952 # with empty expression
953 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
954 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
955 for dataId in rows:
956 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
957 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
958 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
959 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
960 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
961 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
962 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
963 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
964 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
965 (100, 101, 110, 111))
966 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
967 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
969 # second collection
970 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
971 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
972 for dataId in rows:
973 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
974 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
975 (100, 101, 200, 201))
976 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
977 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
979 # with two input datasets
980 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
981 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
982 for dataId in rows:
983 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
984 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
985 (100, 101, 110, 111, 200, 201))
986 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
987 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
989 # limit to single visit
990 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
991 where="visit = 10", instrument="DummyCam").toSet()
992 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
993 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
994 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
995 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
997 # more limiting expression, using link names instead of Table.column
998 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
999 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
1000 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
1001 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1002 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1003 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1005 # queryDataIds with only one of `datasets` and `collections` is an
1006 # error.
1007 with self.assertRaises(TypeError):
1008 registry.queryDataIds(dimensions, datasets=rawType)
1009 with self.assertRaises(TypeError):
1010 registry.queryDataIds(dimensions, collections=run1)
1012 # expression excludes everything
1013 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1014 where="visit > 1000", instrument="DummyCam").toSet()
1015 self.assertEqual(len(rows), 0)
1017 # Selecting by physical_filter, this is not in the dimensions, but it
1018 # is a part of the full expression so it should work too.
1019 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1020 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
1021 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
1022 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1023 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1024 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1026 def testSkyMapDimensions(self):
1027 """Tests involving only skymap dimensions, no joins to instrument."""
1028 registry = self.makeRegistry()
1030 # need a bunch of dimensions and datasets for test, we want
1031 # "band" in the test so also have to add physical_filter
1032 # dimensions
1033 registry.insertDimensionData(
1034 "instrument",
1035 dict(instrument="DummyCam")
1036 )
1037 registry.insertDimensionData(
1038 "physical_filter",
1039 dict(instrument="DummyCam", name="dummy_r", band="r"),
1040 dict(instrument="DummyCam", name="dummy_i", band="i"),
1041 )
1042 registry.insertDimensionData(
1043 "skymap",
1044 dict(name="DummyMap", hash="sha!".encode("utf8"))
1045 )
1046 for tract in range(10):
1047 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1048 registry.insertDimensionData(
1049 "patch",
1050 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
1051 for patch in range(10)]
1052 )
1054 # dataset types
1055 run = "test"
1056 registry.registerRun(run)
1057 storageClass = StorageClass("testDataset")
1058 registry.storageClasses.registerStorageClass(storageClass)
1059 calexpType = DatasetType(name="deepCoadd_calexp",
1060 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1061 "band")),
1062 storageClass=storageClass)
1063 registry.registerDatasetType(calexpType)
1064 mergeType = DatasetType(name="deepCoadd_mergeDet",
1065 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1066 storageClass=storageClass)
1067 registry.registerDatasetType(mergeType)
1068 measType = DatasetType(name="deepCoadd_meas",
1069 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1070 "band")),
1071 storageClass=storageClass)
1072 registry.registerDatasetType(measType)
1074 dimensions = DimensionGraph(
1075 registry.dimensions,
1076 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
1077 | measType.dimensions.required)
1078 )
1080 # add pre-existing datasets
1081 for tract in (1, 3, 5):
1082 for patch in (2, 4, 6, 7):
1083 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1084 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1085 for aFilter in ("i", "r"):
1086 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1087 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1089 # with empty expression
1090 rows = registry.queryDataIds(dimensions,
1091 datasets=[calexpType, mergeType], collections=run).toSet()
1092 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
1093 for dataId in rows:
1094 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1095 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1096 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1097 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1099 # limit to 2 tracts and 2 patches
1100 rows = registry.queryDataIds(dimensions,
1101 datasets=[calexpType, mergeType], collections=run,
1102 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
1103 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
1104 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1105 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1106 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1108 # limit to single filter
1109 rows = registry.queryDataIds(dimensions,
1110 datasets=[calexpType, mergeType], collections=run,
1111 where="band = 'i'").toSet()
1112 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
1113 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1114 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1115 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1117 # expression excludes everything, specifying non-existing skymap is
1118 # not a fatal error, it's operator error
1119 rows = registry.queryDataIds(dimensions,
1120 datasets=[calexpType, mergeType], collections=run,
1121 where="skymap = 'Mars'").toSet()
1122 self.assertEqual(len(rows), 0)
1124 def testSpatialJoin(self):
1125 """Test queries that involve spatial overlap joins.
1126 """
1127 registry = self.makeRegistry()
1128 self.loadData(registry, "hsc-rc2-subset.yaml")
1130 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1131 # the TopologicalFamily they belong to. We'll relate all elements in
1132 # each family to all of the elements in each other family.
1133 families = defaultdict(set)
1134 # Dictionary of {element.name: {dataId: region}}.
1135 regions = {}
1136 for element in registry.dimensions.getDatabaseElements():
1137 if element.spatial is not None:
1138 families[element.spatial.name].add(element)
1139 regions[element.name] = {
1140 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1141 }
1143 # If this check fails, it's not necessarily a problem - it may just be
1144 # a reasonable change to the default dimension definitions - but the
1145 # test below depends on there being more than one family to do anything
1146 # useful.
1147 self.assertEqual(len(families), 2)
1149 # Overlap DatabaseDimensionElements with each other.
1150 for family1, family2 in itertools.combinations(families, 2):
1151 for element1, element2 in itertools.product(families[family1], families[family2]):
1152 graph = DimensionGraph.union(element1.graph, element2.graph)
1153 # Construct expected set of overlapping data IDs via a
1154 # brute-force comparison of the regions we've already fetched.
1155 expected = {
1156 DataCoordinate.standardize(
1157 {**dataId1.byName(), **dataId2.byName()},
1158 graph=graph
1159 )
1160 for (dataId1, region1), (dataId2, region2)
1161 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1162 if not region1.isDisjointFrom(region2)
1163 }
1164 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1165 queried = set(registry.queryDataIds(graph))
1166 self.assertEqual(expected, queried)
1168 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1169 commonSkyPix = registry.dimensions.commonSkyPix
1170 for elementName, regions in regions.items():
1171 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1172 expected = set()
1173 for dataId, region in regions.items():
1174 for begin, end in commonSkyPix.pixelization.envelope(region):
1175 expected.update(
1176 DataCoordinate.standardize(
1177 {commonSkyPix.name: index, **dataId.byName()},
1178 graph=graph
1179 )
1180 for index in range(begin, end)
1181 )
1182 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1183 queried = set(registry.queryDataIds(graph))
1184 self.assertEqual(expected, queried)
1186 def testAbstractQuery(self):
1187 """Test that we can run a query that just lists the known
1188 bands. This is tricky because band is
1189 backed by a query against physical_filter.
1190 """
1191 registry = self.makeRegistry()
1192 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1193 registry.insertDimensionData(
1194 "physical_filter",
1195 dict(instrument="DummyCam", name="dummy_i", band="i"),
1196 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1197 dict(instrument="DummyCam", name="dummy_r", band="r"),
1198 )
1199 rows = registry.queryDataIds(["band"]).toSet()
1200 self.assertCountEqual(
1201 rows,
1202 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1203 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1204 )
1206 def testAttributeManager(self):
1207 """Test basic functionality of attribute manager.
1208 """
1209 # number of attributes with schema versions in a fresh database,
1210 # 6 managers with 3 records per manager, plus config for dimensions
1211 VERSION_COUNT = 6 * 3 + 1
1213 registry = self.makeRegistry()
1214 attributes = registry._managers.attributes
1216 # check what get() returns for non-existing key
1217 self.assertIsNone(attributes.get("attr"))
1218 self.assertEqual(attributes.get("attr", ""), "")
1219 self.assertEqual(attributes.get("attr", "Value"), "Value")
1220 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1222 # cannot store empty key or value
1223 with self.assertRaises(ValueError):
1224 attributes.set("", "value")
1225 with self.assertRaises(ValueError):
1226 attributes.set("attr", "")
1228 # set value of non-existing key
1229 attributes.set("attr", "value")
1230 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1231 self.assertEqual(attributes.get("attr"), "value")
1233 # update value of existing key
1234 with self.assertRaises(ButlerAttributeExistsError):
1235 attributes.set("attr", "value2")
1237 attributes.set("attr", "value2", force=True)
1238 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1239 self.assertEqual(attributes.get("attr"), "value2")
1241 # delete existing key
1242 self.assertTrue(attributes.delete("attr"))
1243 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1245 # delete non-existing key
1246 self.assertFalse(attributes.delete("non-attr"))
1248 # store bunch of keys and get the list back
1249 data = [
1250 ("version.core", "1.2.3"),
1251 ("version.dimensions", "3.2.1"),
1252 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1253 ]
1254 for key, value in data:
1255 attributes.set(key, value)
1256 items = dict(attributes.items())
1257 for key, value in data:
1258 self.assertEqual(items[key], value)
1260 def testQueryDatasetsDeduplication(self):
1261 """Test that the findFirst option to queryDatasets selects datasets
1262 from collections in the order given".
1263 """
1264 registry = self.makeRegistry()
1265 self.loadData(registry, "base.yaml")
1266 self.loadData(registry, "datasets.yaml")
1267 self.assertCountEqual(
1268 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1269 [
1270 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1271 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1272 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1273 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1274 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1275 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1276 ]
1277 )
1278 self.assertCountEqual(
1279 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1280 findFirst=True)),
1281 [
1282 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1283 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1284 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1285 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1286 ]
1287 )
1288 self.assertCountEqual(
1289 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1290 findFirst=True)),
1291 [
1292 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1293 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1294 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1295 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1296 ]
1297 )
1299 def testQueryResults(self):
1300 """Test querying for data IDs and then manipulating the QueryResults
1301 object returned to perform other queries.
1302 """
1303 registry = self.makeRegistry()
1304 self.loadData(registry, "base.yaml")
1305 self.loadData(registry, "datasets.yaml")
1306 bias = registry.getDatasetType("bias")
1307 flat = registry.getDatasetType("flat")
1308 # Obtain expected results from methods other than those we're testing
1309 # here. That includes:
1310 # - the dimensions of the data IDs we want to query:
1311 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1312 # - the dimensions of some other data IDs we'll extract from that:
1313 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1314 # - the data IDs we expect to obtain from the first queries:
1315 expectedDataIds = DataCoordinateSet(
1316 {
1317 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1318 universe=registry.dimensions)
1319 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1320 },
1321 graph=expectedGraph,
1322 hasFull=False,
1323 hasRecords=False,
1324 )
1325 # - the flat datasets we expect to find from those data IDs, in just
1326 # one collection (so deduplication is irrelevant):
1327 expectedFlats = [
1328 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1329 collections="imported_r"),
1330 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1331 collections="imported_r"),
1332 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1333 collections="imported_r"),
1334 ]
1335 # - the data IDs we expect to extract from that:
1336 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1337 # - the bias datasets we expect to find from those data IDs, after we
1338 # subset-out the physical_filter dimension, both with duplicates:
1339 expectedAllBiases = [
1340 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1341 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1342 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1343 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1344 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1345 ]
1346 # - ...and without duplicates:
1347 expectedDeduplicatedBiases = [
1348 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1349 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1350 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1351 ]
1352 # Test against those expected results, using a "lazy" query for the
1353 # data IDs (which re-executes that query each time we use it to do
1354 # something new).
1355 dataIds = registry.queryDataIds(
1356 ["detector", "physical_filter"],
1357 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1358 instrument="Cam1",
1359 )
1360 self.assertEqual(dataIds.graph, expectedGraph)
1361 self.assertEqual(dataIds.toSet(), expectedDataIds)
1362 self.assertCountEqual(
1363 list(
1364 dataIds.findDatasets(
1365 flat,
1366 collections=["imported_r"],
1367 )
1368 ),
1369 expectedFlats,
1370 )
1371 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1372 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1373 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1374 self.assertCountEqual(
1375 list(
1376 subsetDataIds.findDatasets(
1377 bias,
1378 collections=["imported_r", "imported_g"],
1379 findFirst=False
1380 )
1381 ),
1382 expectedAllBiases
1383 )
1384 self.assertCountEqual(
1385 list(
1386 subsetDataIds.findDatasets(
1387 bias,
1388 collections=["imported_r", "imported_g"],
1389 findFirst=True
1390 )
1391 ), expectedDeduplicatedBiases
1392 )
1393 # Materialize the bias dataset queries (only) by putting the results
1394 # into temporary tables, then repeat those tests.
1395 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1396 findFirst=False).materialize() as biases:
1397 self.assertCountEqual(list(biases), expectedAllBiases)
1398 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1399 findFirst=True).materialize() as biases:
1400 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1401 # Materialize the data ID subset query, but not the dataset queries.
1402 with subsetDataIds.materialize() as subsetDataIds:
1403 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1404 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1405 self.assertCountEqual(
1406 list(
1407 subsetDataIds.findDatasets(
1408 bias,
1409 collections=["imported_r", "imported_g"],
1410 findFirst=False
1411 )
1412 ),
1413 expectedAllBiases
1414 )
1415 self.assertCountEqual(
1416 list(
1417 subsetDataIds.findDatasets(
1418 bias,
1419 collections=["imported_r", "imported_g"],
1420 findFirst=True
1421 )
1422 ), expectedDeduplicatedBiases
1423 )
1424 # Materialize the dataset queries, too.
1425 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1426 findFirst=False).materialize() as biases:
1427 self.assertCountEqual(list(biases), expectedAllBiases)
1428 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1429 findFirst=True).materialize() as biases:
1430 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1431 # Materialize the original query, but none of the follow-up queries.
1432 with dataIds.materialize() as dataIds:
1433 self.assertEqual(dataIds.graph, expectedGraph)
1434 self.assertEqual(dataIds.toSet(), expectedDataIds)
1435 self.assertCountEqual(
1436 list(
1437 dataIds.findDatasets(
1438 flat,
1439 collections=["imported_r"],
1440 )
1441 ),
1442 expectedFlats,
1443 )
1444 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1445 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1446 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1447 self.assertCountEqual(
1448 list(
1449 subsetDataIds.findDatasets(
1450 bias,
1451 collections=["imported_r", "imported_g"],
1452 findFirst=False
1453 )
1454 ),
1455 expectedAllBiases
1456 )
1457 self.assertCountEqual(
1458 list(
1459 subsetDataIds.findDatasets(
1460 bias,
1461 collections=["imported_r", "imported_g"],
1462 findFirst=True
1463 )
1464 ), expectedDeduplicatedBiases
1465 )
1466 # Materialize just the bias dataset queries.
1467 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1468 findFirst=False).materialize() as biases:
1469 self.assertCountEqual(list(biases), expectedAllBiases)
1470 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1471 findFirst=True).materialize() as biases:
1472 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1473 # Materialize the subset data ID query, but not the dataset
1474 # queries.
1475 with subsetDataIds.materialize() as subsetDataIds:
1476 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1477 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1478 self.assertCountEqual(
1479 list(
1480 subsetDataIds.findDatasets(
1481 bias,
1482 collections=["imported_r", "imported_g"],
1483 findFirst=False
1484 )
1485 ),
1486 expectedAllBiases
1487 )
1488 self.assertCountEqual(
1489 list(
1490 subsetDataIds.findDatasets(
1491 bias,
1492 collections=["imported_r", "imported_g"],
1493 findFirst=True
1494 )
1495 ), expectedDeduplicatedBiases
1496 )
1497 # Materialize the bias dataset queries, too, so now we're
1498 # materializing every single step.
1499 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1500 findFirst=False).materialize() as biases:
1501 self.assertCountEqual(list(biases), expectedAllBiases)
1502 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1503 findFirst=True).materialize() as biases:
1504 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1506 def testEmptyDimensionsQueries(self):
1507 """Test Query and QueryResults objects in the case where there are no
1508 dimensions.
1509 """
1510 # Set up test data: one dataset type, two runs, one dataset in each.
1511 registry = self.makeRegistry()
1512 self.loadData(registry, "base.yaml")
1513 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1514 registry.registerDatasetType(schema)
1515 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1516 run1 = "run1"
1517 run2 = "run2"
1518 registry.registerRun(run1)
1519 registry.registerRun(run2)
1520 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1521 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1522 # Query directly for both of the datasets, and each one, one at a time.
1523 self.checkQueryResults(
1524 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False),
1525 [dataset1, dataset2]
1526 )
1527 self.checkQueryResults(
1528 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1529 [dataset1],
1530 )
1531 self.checkQueryResults(
1532 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1533 [dataset2],
1534 )
1535 # Query for data IDs with no dimensions.
1536 dataIds = registry.queryDataIds([])
1537 self.checkQueryResults(dataIds, [dataId])
1538 # Use queried data IDs to find the datasets.
1539 self.checkQueryResults(
1540 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1541 [dataset1, dataset2],
1542 )
1543 self.checkQueryResults(
1544 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1545 [dataset1],
1546 )
1547 self.checkQueryResults(
1548 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1549 [dataset2],
1550 )
1551 # Now materialize the data ID query results and repeat those tests.
1552 with dataIds.materialize() as dataIds:
1553 self.checkQueryResults(dataIds, [dataId])
1554 self.checkQueryResults(
1555 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1556 [dataset1],
1557 )
1558 self.checkQueryResults(
1559 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1560 [dataset2],
1561 )
1562 # Query for non-empty data IDs, then subset that to get the empty one.
1563 # Repeat the above tests starting from that.
1564 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1565 self.checkQueryResults(dataIds, [dataId])
1566 self.checkQueryResults(
1567 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1568 [dataset1, dataset2],
1569 )
1570 self.checkQueryResults(
1571 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1572 [dataset1],
1573 )
1574 self.checkQueryResults(
1575 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1576 [dataset2],
1577 )
1578 with dataIds.materialize() as dataIds:
1579 self.checkQueryResults(dataIds, [dataId])
1580 self.checkQueryResults(
1581 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1582 [dataset1, dataset2],
1583 )
1584 self.checkQueryResults(
1585 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1586 [dataset1],
1587 )
1588 self.checkQueryResults(
1589 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1590 [dataset2],
1591 )
1592 # Query for non-empty data IDs, then materialize, then subset to get
1593 # the empty one. Repeat again.
1594 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1595 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1596 self.checkQueryResults(dataIds, [dataId])
1597 self.checkQueryResults(
1598 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1599 [dataset1, dataset2],
1600 )
1601 self.checkQueryResults(
1602 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1603 [dataset1],
1604 )
1605 self.checkQueryResults(
1606 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1607 [dataset2],
1608 )
1609 with dataIds.materialize() as dataIds:
1610 self.checkQueryResults(
1611 dataIds,
1612 [dataId]
1613 )
1614 self.checkQueryResults(
1615 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1616 [dataset1, dataset2],
1617 )
1618 self.checkQueryResults(
1619 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1620 [dataset1],
1621 )
1622 self.checkQueryResults(
1623 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1624 [dataset2],
1625 )
1627 def testDimensionDataModifications(self):
1628 """Test that modifying dimension records via:
1629 syncDimensionData(..., update=True) and
1630 insertDimensionData(..., replace=True) works as expected, even in the
1631 presence of datasets using those dimensions and spatial overlap
1632 relationships.
1633 """
1635 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1636 """Unpack a sphgeom.RangeSet into the integers it contains.
1637 """
1638 for begin, end in ranges:
1639 yield from range(begin, end)
1641 def range_set_hull(
1642 ranges: lsst.sphgeom.RangeSet,
1643 pixelization: lsst.sphgeom.HtmPixelization,
1644 ) -> lsst.sphgeom.ConvexPolygon:
1645 """Create a ConvexPolygon hull of the region defined by a set of
1646 HTM pixelization index ranges.
1647 """
1648 points = []
1649 for index in unpack_range_set(ranges):
1650 points.extend(pixelization.triangle(index).getVertices())
1651 return lsst.sphgeom.ConvexPolygon(points)
1653 # Use HTM to set up an initial parent region (one arbitrary trixel)
1654 # and four child regions (the trixels within the parent at the next
1655 # level. We'll use the parent as a tract/visit region and the children
1656 # as its patch/visit_detector regions.
1657 registry = self.makeRegistry()
1658 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1659 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1660 index = 12288
1661 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1662 assert htm6.universe().contains(child_ranges_small)
1663 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1664 parent_region_small = lsst.sphgeom.ConvexPolygon(
1665 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1666 )
1667 assert all(parent_region_small.contains(c) for c in child_regions_small)
1668 # Make a larger version of each child region, defined to be the set of
1669 # htm6 trixels that overlap the original's bounding circle. Make a new
1670 # parent that's the convex hull of the new children.
1671 child_regions_large = [
1672 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6)
1673 for c in child_regions_small
1674 ]
1675 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1676 parent_region_large = lsst.sphgeom.ConvexPolygon(
1677 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1678 )
1679 assert all(parent_region_large.contains(c) for c in child_regions_large)
1680 assert parent_region_large.contains(parent_region_small)
1681 assert not parent_region_small.contains(parent_region_large)
1682 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1683 # Find some commonSkyPix indices that overlap the large regions but not
1684 # overlap the small regions. We use commonSkyPix here to make sure the
1685 # real tests later involve what's in the database, not just post-query
1686 # region filtering.
1687 child_difference_indices = []
1688 for large, small in zip(child_regions_large, child_regions_small):
1689 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1690 assert difference, "if this is empty, we can't test anything useful with these regions"
1691 assert all(
1692 not commonSkyPix.triangle(d).isDisjointFrom(large)
1693 and commonSkyPix.triangle(d).isDisjointFrom(small)
1694 for d in difference
1695 )
1696 child_difference_indices.append(difference)
1697 parent_difference_indices = list(
1698 unpack_range_set(
1699 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1700 )
1701 )
1702 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1703 assert all(
1704 (
1705 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1706 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1707 )
1708 for d in parent_difference_indices
1709 )
1710 # Now that we've finally got those regions, we'll insert the large ones
1711 # as tract/patch dimension records.
1712 skymap_name = "testing_v1"
1713 registry.insertDimensionData(
1714 "skymap", {
1715 "name": skymap_name,
1716 "hash": bytes([42]),
1717 "tract_max": 1,
1718 "patch_nx_max": 2,
1719 "patch_ny_max": 2,
1720 }
1721 )
1722 registry.insertDimensionData(
1723 "tract",
1724 {"skymap": skymap_name, "id": 0, "region": parent_region_large}
1725 )
1726 registry.insertDimensionData(
1727 "patch",
1728 *[{
1729 "skymap": skymap_name,
1730 "tract": 0,
1731 "id": n,
1732 "cell_x": n % 2,
1733 "cell_y": n // 2,
1734 "region": c
1735 } for n, c in enumerate(child_regions_large)]
1736 )
1737 # Add at dataset that uses these dimensions to make sure that modifying
1738 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1739 # implement insert with replace=True as delete-then-insert).
1740 dataset_type = DatasetType(
1741 "coadd",
1742 dimensions=["tract", "patch"],
1743 universe=registry.dimensions,
1744 storageClass="Exposure",
1745 )
1746 registry.registerDatasetType(dataset_type)
1747 registry.registerCollection("the_run", CollectionType.RUN)
1748 registry.insertDatasets(
1749 dataset_type,
1750 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1751 run="the_run",
1752 )
1753 # Query for tracts and patches that overlap some "difference" htm9
1754 # pixels; there should be overlaps, because the database has
1755 # the "large" suite of regions.
1756 self.assertEqual(
1757 {0},
1758 {
1759 data_id["tract"] for data_id in registry.queryDataIds(
1760 ["tract"],
1761 skymap=skymap_name,
1762 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1763 )
1764 }
1765 )
1766 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1767 self.assertIn(
1768 patch_id,
1769 {
1770 data_id["patch"] for data_id in registry.queryDataIds(
1771 ["patch"],
1772 skymap=skymap_name,
1773 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1774 )
1775 }
1776 )
1777 # Use sync to update the tract region and insert to update the patch
1778 # regions, to the "small" suite.
1779 updated = registry.syncDimensionData(
1780 "tract",
1781 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1782 update=True,
1783 )
1784 self.assertEqual(updated, {"region": parent_region_large})
1785 registry.insertDimensionData(
1786 "patch",
1787 *[{
1788 "skymap": skymap_name,
1789 "tract": 0,
1790 "id": n,
1791 "cell_x": n % 2,
1792 "cell_y": n // 2,
1793 "region": c
1794 } for n, c in enumerate(child_regions_small)],
1795 replace=True
1796 )
1797 # Query again; there now should be no such overlaps, because the
1798 # database has the "small" suite of regions.
1799 self.assertFalse(
1800 set(
1801 registry.queryDataIds(
1802 ["tract"],
1803 skymap=skymap_name,
1804 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1805 )
1806 )
1807 )
1808 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1809 self.assertNotIn(
1810 patch_id,
1811 {
1812 data_id["patch"] for data_id in registry.queryDataIds(
1813 ["patch"],
1814 skymap=skymap_name,
1815 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1816 )
1817 }
1818 )
1819 # Update back to the large regions and query one more time.
1820 updated = registry.syncDimensionData(
1821 "tract",
1822 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1823 update=True,
1824 )
1825 self.assertEqual(updated, {"region": parent_region_small})
1826 registry.insertDimensionData(
1827 "patch",
1828 *[{
1829 "skymap": skymap_name,
1830 "tract": 0,
1831 "id": n,
1832 "cell_x": n % 2,
1833 "cell_y": n // 2,
1834 "region": c
1835 } for n, c in enumerate(child_regions_large)],
1836 replace=True
1837 )
1838 self.assertEqual(
1839 {0},
1840 {
1841 data_id["tract"] for data_id in registry.queryDataIds(
1842 ["tract"],
1843 skymap=skymap_name,
1844 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1845 )
1846 }
1847 )
1848 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1849 self.assertIn(
1850 patch_id,
1851 {
1852 data_id["patch"] for data_id in registry.queryDataIds(
1853 ["patch"],
1854 skymap=skymap_name,
1855 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1856 )
1857 }
1858 )
1860 def testCalibrationCollections(self):
1861 """Test operations on `~CollectionType.CALIBRATION` collections,
1862 including `Registry.certify`, `Registry.decertify`, and
1863 `Registry.findDataset`.
1864 """
1865 # Setup - make a Registry, fill it with some datasets in
1866 # non-calibration collections.
1867 registry = self.makeRegistry()
1868 self.loadData(registry, "base.yaml")
1869 self.loadData(registry, "datasets.yaml")
1870 # Set up some timestamps.
1871 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1872 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1873 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1874 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1875 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1876 allTimespans = [
1877 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1878 ]
1879 # Get references to some datasets.
1880 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1881 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1882 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1883 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1884 # Register the main calibration collection we'll be working with.
1885 collection = "Cam1/calibs/default"
1886 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1887 # Cannot associate into a calibration collection (no timespan).
1888 with self.assertRaises(TypeError):
1889 registry.associate(collection, [bias2a])
1890 # Certify 2a dataset with [t2, t4) validity.
1891 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1892 # We should not be able to certify 2b with anything overlapping that
1893 # window.
1894 with self.assertRaises(ConflictingDefinitionError):
1895 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1896 with self.assertRaises(ConflictingDefinitionError):
1897 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1898 with self.assertRaises(ConflictingDefinitionError):
1899 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1900 with self.assertRaises(ConflictingDefinitionError):
1901 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1902 with self.assertRaises(ConflictingDefinitionError):
1903 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1904 with self.assertRaises(ConflictingDefinitionError):
1905 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1906 with self.assertRaises(ConflictingDefinitionError):
1907 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1908 with self.assertRaises(ConflictingDefinitionError):
1909 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1910 # We should be able to certify 3a with a range overlapping that window,
1911 # because it's for a different detector.
1912 # We'll certify 3a over [t1, t3).
1913 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1914 # Now we'll certify 2b and 3b together over [t4, ∞).
1915 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1917 # Fetch all associations and check that they are what we expect.
1918 self.assertCountEqual(
1919 list(
1920 registry.queryDatasetAssociations(
1921 "bias",
1922 collections=[collection, "imported_g", "imported_r"],
1923 )
1924 ),
1925 [
1926 DatasetAssociation(
1927 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1928 collection="imported_g",
1929 timespan=None,
1930 ),
1931 DatasetAssociation(
1932 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1933 collection="imported_r",
1934 timespan=None,
1935 ),
1936 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1937 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1938 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1939 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1940 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1941 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1942 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1943 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1944 ]
1945 )
1947 class Ambiguous:
1948 """Tag class to denote lookups that are expected to be ambiguous.
1949 """
1950 pass
1952 def assertLookup(detector: int, timespan: Timespan,
1953 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1954 """Local function that asserts that a bias lookup returns the given
1955 expected result.
1956 """
1957 if expected is Ambiguous:
1958 with self.assertRaises(RuntimeError):
1959 registry.findDataset("bias", collections=collection, instrument="Cam1",
1960 detector=detector, timespan=timespan)
1961 else:
1962 self.assertEqual(
1963 expected,
1964 registry.findDataset("bias", collections=collection, instrument="Cam1",
1965 detector=detector, timespan=timespan)
1966 )
1968 # Systematically test lookups against expected results.
1969 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1970 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1971 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1972 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1973 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1974 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1975 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1976 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1977 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1978 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1979 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1980 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1981 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1982 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1983 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1984 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1985 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1986 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1987 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1988 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1989 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1990 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1991 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1992 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1993 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1994 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1995 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1996 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1997 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1998 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1999 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2000 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2001 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2002 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2003 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2004 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2005 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2006 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2007 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2008 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2009 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2010 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2012 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2013 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2014 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2015 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2016 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2017 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2018 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2019 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2020 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2021 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2022 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2023 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2024 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2025 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2026 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2027 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2028 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2029 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2030 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2031 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2032 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2033 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2034 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2035 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2036 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2037 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2038 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2039 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2040 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2041 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2042 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2043 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2044 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2045 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2046 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2047 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2048 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2049 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2050 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2051 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2052 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2053 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2054 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2055 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2056 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2057 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2059 # Decertify everything, this time with explicit data IDs, then check
2060 # that no lookups succeed.
2061 registry.decertify(
2062 collection, "bias", Timespan(None, None),
2063 dataIds=[
2064 dict(instrument="Cam1", detector=2),
2065 dict(instrument="Cam1", detector=3),
2066 ]
2067 )
2068 for detector in (2, 3):
2069 for timespan in allTimespans:
2070 assertLookup(detector=detector, timespan=timespan, expected=None)
2071 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2072 # those.
2073 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
2074 for timespan in allTimespans:
2075 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2076 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2077 # Decertify just bias2 over [t2, t4).
2078 # This should split a single certification row into two (and leave the
2079 # other existing row, for bias3a, alone).
2080 registry.decertify(collection, "bias", Timespan(t2, t4),
2081 dataIds=[dict(instrument="Cam1", detector=2)])
2082 for timespan in allTimespans:
2083 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2084 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2085 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2086 if overlapsBefore and overlapsAfter:
2087 expected = Ambiguous
2088 elif overlapsBefore or overlapsAfter:
2089 expected = bias2a
2090 else:
2091 expected = None
2092 assertLookup(detector=2, timespan=timespan, expected=expected)
2094 def testSkipCalibs(self):
2095 """Test how queries handle skipping of calibration collections.
2096 """
2097 registry = self.makeRegistry()
2098 self.loadData(registry, "base.yaml")
2099 self.loadData(registry, "datasets.yaml")
2101 coll_calib = "Cam1/calibs/default"
2102 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2104 # Add all biases to the calibration collection.
2105 # Without this, the logic that prunes dataset subqueries based on
2106 # datasetType-collection summary information will fire before the logic
2107 # we want to test below. This is a good thing (it avoids the dreaded
2108 # NotImplementedError a bit more often) everywhere but here.
2109 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2111 coll_list = [coll_calib, "imported_g", "imported_r"]
2112 chain = "Cam1/chain"
2113 registry.registerCollection(chain, type=CollectionType.CHAINED)
2114 registry.setCollectionChain(chain, coll_list)
2116 # explicit list will raise if findFirst=True or there are temporal
2117 # dimensions
2118 with self.assertRaises(NotImplementedError):
2119 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2120 with self.assertRaises(NotImplementedError):
2121 registry.queryDataIds(["instrument", "detector", "exposure"], datasets="bias",
2122 collections=coll_list).count()
2124 # chain will skip
2125 datasets = list(registry.queryDatasets("bias", collections=chain))
2126 self.assertGreater(len(datasets), 0)
2128 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias",
2129 collections=chain))
2130 self.assertGreater(len(dataIds), 0)
2132 # glob will skip too
2133 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2134 self.assertGreater(len(datasets), 0)
2136 # regular expression will skip too
2137 pattern = re.compile(".*")
2138 datasets = list(registry.queryDatasets("bias", collections=pattern))
2139 self.assertGreater(len(datasets), 0)
2141 # ellipsis should work as usual
2142 datasets = list(registry.queryDatasets("bias", collections=...))
2143 self.assertGreater(len(datasets), 0)
2145 # few tests with findFirst
2146 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2147 self.assertGreater(len(datasets), 0)
2149 def testIngestTimeQuery(self):
2151 registry = self.makeRegistry()
2152 self.loadData(registry, "base.yaml")
2153 dt0 = datetime.utcnow()
2154 self.loadData(registry, "datasets.yaml")
2155 dt1 = datetime.utcnow()
2157 datasets = list(registry.queryDatasets(..., collections=...))
2158 len0 = len(datasets)
2159 self.assertGreater(len0, 0)
2161 where = "ingest_date > T'2000-01-01'"
2162 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2163 len1 = len(datasets)
2164 self.assertEqual(len0, len1)
2166 # no one will ever use this piece of software in 30 years
2167 where = "ingest_date > T'2050-01-01'"
2168 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2169 len2 = len(datasets)
2170 self.assertEqual(len2, 0)
2172 # Check more exact timing to make sure there is no 37 seconds offset
2173 # (after fixing DM-30124). SQLite time precision is 1 second, make
2174 # sure that we don't test with higher precision.
2175 tests = [
2176 # format: (timestamp, operator, expected_len)
2177 (dt0 - timedelta(seconds=1), ">", len0),
2178 (dt0 - timedelta(seconds=1), "<", 0),
2179 (dt1 + timedelta(seconds=1), "<", len0),
2180 (dt1 + timedelta(seconds=1), ">", 0),
2181 ]
2182 for dt, op, expect_len in tests:
2183 dt_str = dt.isoformat(sep=" ")
2185 where = f"ingest_date {op} T'{dt_str}'"
2186 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2187 self.assertEqual(len(datasets), expect_len)
2189 # same with bind using datetime or astropy Time
2190 where = f"ingest_date {op} ingest_time"
2191 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2192 bind={"ingest_time": dt}))
2193 self.assertEqual(len(datasets), expect_len)
2195 dt_astropy = astropy.time.Time(dt, format="datetime")
2196 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2197 bind={"ingest_time": dt_astropy}))
2198 self.assertEqual(len(datasets), expect_len)
2200 def testTimespanQueries(self):
2201 """Test query expressions involving timespans.
2202 """
2203 registry = self.makeRegistry()
2204 self.loadData(registry, "hsc-rc2-subset.yaml")
2205 # All exposures in the database; mapping from ID to timespan.
2206 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2207 # Just those IDs, sorted (which is also temporal sorting, because HSC
2208 # exposure IDs are monotonically increasing).
2209 ids = sorted(visits.keys())
2210 self.assertGreater(len(ids), 20)
2211 # Pick some quasi-random indexes into `ids` to play with.
2212 i1 = int(len(ids)*0.1)
2213 i2 = int(len(ids)*0.3)
2214 i3 = int(len(ids)*0.6)
2215 i4 = int(len(ids)*0.8)
2216 # Extract some times from those: just before the beginning of i1 (which
2217 # should be after the end of the exposure before), exactly the
2218 # beginning of i2, just after the beginning of i3 (and before its end),
2219 # and the exact end of i4.
2220 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2221 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2222 t2 = visits[ids[i2]].begin
2223 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2224 self.assertLess(t3, visits[ids[i3]].end)
2225 t4 = visits[ids[i4]].end
2226 # Make sure those are actually in order.
2227 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2229 bind = {
2230 "t1": t1,
2231 "t2": t2,
2232 "t3": t3,
2233 "t4": t4,
2234 "ts23": Timespan(t2, t3),
2235 }
2237 def query(where):
2238 """Helper function that queries for visit data IDs and returns
2239 results as a sorted, deduplicated list of visit IDs.
2240 """
2241 return sorted(
2242 {dataId["visit"] for dataId in registry.queryDataIds("visit",
2243 instrument="HSC",
2244 bind=bind,
2245 where=where)}
2246 )
2248 # Try a bunch of timespan queries, mixing up the bounds themselves,
2249 # where they appear in the expression, and how we get the timespan into
2250 # the expression.
2252 # t1 is before the start of i1, so this should not include i1.
2253 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2254 # t2 is exactly at the start of i2, but ends are exclusive, so these
2255 # should not include i2.
2256 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2257 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2258 # t3 is in the middle of i3, so this should include i3.
2259 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
2260 # This one should not include t3 by the same reasoning.
2261 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
2262 # t4 is exactly at the end of i4, so this should include i4.
2263 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2264 # i4's upper bound of t4 is exclusive so this should not include t4.
2265 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
2267 # Now some timespan vs. time scalar queries.
2268 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2269 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2270 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
2271 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
2272 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
2273 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2275 # Empty timespans should not overlap anything.
2276 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2278 def testCollectionSummaries(self):
2279 """Test recording and retrieval of collection summaries.
2280 """
2281 self.maxDiff = None
2282 registry = self.makeRegistry()
2283 # Importing datasets from yaml should go through the code path where
2284 # we update collection summaries as we insert datasets.
2285 self.loadData(registry, "base.yaml")
2286 self.loadData(registry, "datasets.yaml")
2287 flat = registry.getDatasetType("flat")
2288 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2289 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2290 expected1.datasetTypes.add(flat)
2291 expected1.dimensions.update_extract(
2292 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2293 )
2294 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2295 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2296 # Create a chained collection with both of the imported runs; the
2297 # summary should be the same, because it's a union with itself.
2298 chain = "chain"
2299 registry.registerCollection(chain, CollectionType.CHAINED)
2300 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2301 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2302 # Associate flats only into a tagged collection and a calibration
2303 # collection to check summaries of those.
2304 tag = "tag"
2305 registry.registerCollection(tag, CollectionType.TAGGED)
2306 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2307 calibs = "calibs"
2308 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2309 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
2310 timespan=Timespan(None, None))
2311 expected2 = expected1.copy()
2312 expected2.datasetTypes.discard("bias")
2313 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2314 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2315 # Explicitly calling Registry.refresh() should load those same
2316 # summaries, via a totally different code path.
2317 registry.refresh()
2318 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2319 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2320 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2321 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2323 def testUnrelatedDimensionQueries(self):
2324 """Test that WHERE expressions in queries can reference dimensions that
2325 are not in the result set.
2326 """
2327 registry = self.makeRegistry()
2328 # There is no data to back this query, but it should still return
2329 # zero records instead of raising.
2330 self.assertFalse(
2331 set(registry.queryDataIds(["visit", "detector"],
2332 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
2333 )
2335 def testBindInQueryDatasets(self):
2336 """Test that the bind parameter is correctly forwarded in
2337 queryDatasets recursion.
2338 """
2339 registry = self.makeRegistry()
2340 # Importing datasets from yaml should go through the code path where
2341 # we update collection summaries as we insert datasets.
2342 self.loadData(registry, "base.yaml")
2343 self.loadData(registry, "datasets.yaml")
2344 self.assertEqual(
2345 set(registry.queryDatasets("flat", band="r", collections=...)),
2346 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2347 )
2349 def testQueryResultSummaries(self):
2350 """Test summary methods like `count`, `any`, and `explain_no_results`
2351 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2352 """
2353 registry = self.makeRegistry()
2354 self.loadData(registry, "base.yaml")
2355 self.loadData(registry, "datasets.yaml")
2356 self.loadData(registry, "spatial.yaml")
2357 # Default test dataset has two collections, each with both flats and
2358 # biases. Add a new collection with only biases.
2359 registry.registerCollection("biases", CollectionType.TAGGED)
2360 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2361 # First query yields two results, and involves no postprocessing.
2362 query1 = registry.queryDataIds(["physical_filter"], band="r")
2363 self.assertTrue(query1.any(execute=False, exact=False))
2364 self.assertTrue(query1.any(execute=True, exact=False))
2365 self.assertTrue(query1.any(execute=True, exact=True))
2366 self.assertEqual(query1.count(exact=False), 2)
2367 self.assertEqual(query1.count(exact=True), 2)
2368 self.assertFalse(list(query1.explain_no_results()))
2369 # Second query should yield no results, but this isn't detectable
2370 # unless we actually run a query.
2371 query2 = registry.queryDataIds(["physical_filter"], band="h")
2372 self.assertTrue(query2.any(execute=False, exact=False))
2373 self.assertFalse(query2.any(execute=True, exact=False))
2374 self.assertFalse(query2.any(execute=True, exact=True))
2375 self.assertEqual(query2.count(exact=False), 0)
2376 self.assertEqual(query2.count(exact=True), 0)
2377 self.assertFalse(list(query2.explain_no_results()))
2378 # These queries yield no results due to various problems that can be
2379 # spotted prior to execution, yielding helpful diagnostics.
2380 for query, snippets in [
2381 (
2382 # Dataset type name doesn't match any existing dataset types.
2383 registry.queryDatasets("nonexistent", collections=...),
2384 ["nonexistent"],
2385 ),
2386 (
2387 # Dataset type name doesn't match any existing dataset types.
2388 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2389 ["nonexistent"],
2390 ),
2391 (
2392 # Dataset type object isn't registered.
2393 registry.queryDatasets(
2394 DatasetType(
2395 "nonexistent",
2396 dimensions=["instrument"],
2397 universe=registry.dimensions,
2398 storageClass="Image",
2399 ),
2400 collections=...
2401 ),
2402 ["nonexistent"],
2403 ),
2404 (
2405 # No datasets of this type in this collection.
2406 registry.queryDatasets("flat", collections=["biases"]),
2407 ["flat", "biases"],
2408 ),
2409 (
2410 # No collections matching at all.
2411 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2412 ["potato"],
2413 ),
2414 ]:
2416 self.assertFalse(query.any(execute=False, exact=False))
2417 self.assertFalse(query.any(execute=True, exact=False))
2418 self.assertFalse(query.any(execute=True, exact=True))
2419 self.assertEqual(query.count(exact=False), 0)
2420 self.assertEqual(query.count(exact=True), 0)
2421 messages = list(query.explain_no_results())
2422 self.assertTrue(messages)
2423 # Want all expected snippets to appear in at least one message.
2424 self.assertTrue(
2425 any(
2426 all(snippet in message for snippet in snippets)
2427 for message in query.explain_no_results()
2428 ),
2429 messages
2430 )
2432 # These queries yield no results due to problems that can be identified
2433 # by cheap follow-up queries, yielding helpful diagnostics.
2434 for query, snippets in [
2435 (
2436 # No records for one of the involved dimensions.
2437 registry.queryDataIds(["subfilter"]),
2438 ["dimension records", "subfilter"],
2439 ),
2440 ]:
2441 self.assertFalse(query.any(execute=True, exact=False))
2442 self.assertFalse(query.any(execute=True, exact=True))
2443 self.assertEqual(query.count(exact=True), 0)
2444 messages = list(query.explain_no_results())
2445 self.assertTrue(messages)
2446 # Want all expected snippets to appear in at least one message.
2447 self.assertTrue(
2448 any(
2449 all(snippet in message for snippet in snippets)
2450 for message in query.explain_no_results()
2451 ),
2452 messages
2453 )
2455 # This query yields four overlaps in the database, but one is filtered
2456 # out in postprocessing. The count queries aren't accurate because
2457 # they don't account for duplication that happens due to an internal
2458 # join against commonSkyPix.
2459 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2460 self.assertEqual(
2461 {
2462 DataCoordinate.standardize(
2463 instrument="Cam1",
2464 skymap="SkyMap1",
2465 visit=v,
2466 tract=t,
2467 universe=registry.dimensions,
2468 )
2469 for v, t in [(1, 0), (2, 0), (2, 1)]
2470 },
2471 set(query3),
2472 )
2473 self.assertTrue(query3.any(execute=False, exact=False))
2474 self.assertTrue(query3.any(execute=True, exact=False))
2475 self.assertTrue(query3.any(execute=True, exact=True))
2476 self.assertGreaterEqual(query3.count(exact=False), 4)
2477 self.assertGreaterEqual(query3.count(exact=True), 3)
2478 self.assertFalse(list(query3.explain_no_results()))
2479 # This query yields overlaps in the database, but all are filtered
2480 # out in postprocessing. The count queries again aren't very useful.
2481 # We have to use `where=` here to avoid an optimization that
2482 # (currently) skips the spatial postprocess-filtering because it
2483 # recognizes that no spatial join is necessary. That's not ideal, but
2484 # fixing it is out of scope for this ticket.
2485 query4 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1",
2486 where="visit=1 AND detector=1 AND tract=0 AND patch=4")
2487 self.assertFalse(set(query4))
2488 self.assertTrue(query4.any(execute=False, exact=False))
2489 self.assertTrue(query4.any(execute=True, exact=False))
2490 self.assertFalse(query4.any(execute=True, exact=True))
2491 self.assertGreaterEqual(query4.count(exact=False), 1)
2492 self.assertEqual(query4.count(exact=True), 0)
2493 messages = list(query4.explain_no_results())
2494 self.assertTrue(messages)
2495 self.assertTrue(
2496 any(
2497 "regions did not overlap" in message
2498 for message in messages
2499 )
2500 )
2502 def testQueryDataIdsOrderBy(self):
2503 """Test order_by and limit on result returned by queryDataIds().
2504 """
2505 registry = self.makeRegistry()
2506 self.loadData(registry, "base.yaml")
2507 self.loadData(registry, "datasets.yaml")
2508 self.loadData(registry, "spatial.yaml")
2510 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2511 return registry.queryDataIds(dimensions, datasets=datasets, collections=collections,
2512 instrument="Cam1", skymap="SkyMap1")
2514 # query = do_query()
2515 # self.assertEqual(len(list(query)), 6)
2517 Test = namedtuple(
2518 "testQueryDataIdsOrderByTest",
2519 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2520 defaults=(None, None, None),
2521 )
2523 # For each test four items are defined here:
2524 # - order_by column names, comma separated
2525 # - limit tuple or None
2526 # - DataId keys to extract
2527 # - tuple of the resulting values we expect
2528 test_data = (
2529 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2530 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2531 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2532 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2533 Test("tract.id,visit.id", "tract,visit", ((0, 1), (0, 1), (0, 2)), limit=(3, ),),
2534 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3, )),
2535 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2536 Test("-tract,-visit", "tract,visit", ((0, 1), ), limit=(3, 5)),
2537 Test("tract,visit.exposure_time", "tract,visit",
2538 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2539 Test("-tract,-visit.exposure_time", "tract,visit",
2540 ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2541 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2542 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2543 Test("tract,-timespan.begin,timespan.end", "tract,visit",
2544 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2545 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2546 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2547 Test("tract,detector", "tract,detector",
2548 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2549 datasets="flat", collections="imported_r"),
2550 Test("tract,detector.full_name", "tract,detector",
2551 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2552 datasets="flat", collections="imported_r"),
2553 Test("tract,detector.raft,detector.name_in_raft", "tract,detector",
2554 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2555 datasets="flat", collections="imported_r"),
2556 )
2558 for test in test_data:
2559 order_by = test.order_by.split(",")
2560 keys = test.keys.split(",")
2561 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2562 if test.limit is not None:
2563 query = query.limit(*test.limit)
2564 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2565 self.assertEqual(dataIds, test.result)
2567 # and materialize
2568 query = do_query(keys).order_by(*order_by)
2569 if test.limit is not None:
2570 query = query.limit(*test.limit)
2571 with query.materialize() as materialized:
2572 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in materialized)
2573 self.assertEqual(dataIds, test.result)
2575 # errors in a name
2576 for order_by in ("", "-"):
2577 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2578 list(do_query().order_by(order_by))
2580 for order_by in ("undimension.name", "-undimension.name"):
2581 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2582 list(do_query().order_by(order_by))
2584 for order_by in ("attract", "-attract"):
2585 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2586 list(do_query().order_by(order_by))
2588 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2589 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2591 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2592 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2594 with self.assertRaisesRegex(ValueError,
2595 "Cannot find any temporal dimension element for 'timespan.begin'"):
2596 list(do_query(("tract")).order_by("timespan.begin"))
2598 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2599 list(do_query(("tract")).order_by("tract.timespan.begin"))
2601 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2602 list(do_query(("tract")).order_by("tract.name"))
2604 def testQueryDimensionRecordsOrderBy(self):
2605 """Test order_by and limit on result returned by
2606 queryDimensionRecords().
2607 """
2608 registry = self.makeRegistry()
2609 self.loadData(registry, "base.yaml")
2610 self.loadData(registry, "datasets.yaml")
2611 self.loadData(registry, "spatial.yaml")
2613 def do_query():
2614 return registry.queryDimensionRecords("detector", instrument="Cam1")
2616 query = do_query()
2617 self.assertEqual(len(list(query)), 4)
2619 # For each test three items are defined here:
2620 # - order_by column names, comma separated
2621 # - limit tuple or None
2622 # - tuple of the detector IDs
2623 test_data = (
2624 ("detector", None, (1, 2, 3, 4)),
2625 ("-detector", None, (4, 3, 2, 1)),
2626 ("raft,-name_in_raft", None, (2, 1, 4, 3)),
2627 ("-detector.purpose", (1, ), (4, )),
2628 ("-purpose,detector.raft,name_in_raft", (2, 2), (2, 3)),
2629 )
2631 for order_by, limit, expected in test_data:
2632 order_by = order_by.split(",")
2633 query = do_query().order_by(*order_by)
2634 if limit is not None:
2635 query = query.limit(*limit)
2636 dataIds = tuple(rec.id for rec in query)
2637 self.assertEqual(dataIds, expected)
2639 # errors in a name
2640 for order_by in ("", "-"):
2641 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2642 list(do_query().order_by(order_by))
2644 for order_by in ("undimension.name", "-undimension.name"):
2645 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2646 list(do_query().order_by(order_by))
2648 for order_by in ("attract", "-attract"):
2649 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2650 list(do_query().order_by(order_by))
2652 def testDatasetConstrainedDimensionRecordQueries(self):
2653 """Test that queryDimensionRecords works even when given a dataset
2654 constraint whose dimensions extend beyond the requested dimension
2655 element's.
2656 """
2657 registry = self.makeRegistry()
2658 self.loadData(registry, "base.yaml")
2659 self.loadData(registry, "datasets.yaml")
2660 # Query for physical_filter dimension records, using a dataset that
2661 # has both physical_filter and dataset dimensions.
2662 records = registry.queryDimensionRecords(
2663 "physical_filter",
2664 datasets=["flat"],
2665 collections="imported_r",
2666 )
2667 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})