Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%
1352 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
« prev ^ index » next coverage.py v6.5.0, created at 2023-01-07 02:05 -0800
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from lsst.daf.relation import RelationalAlgebraError
47from ...core import (
48 DataCoordinate,
49 DataCoordinateSet,
50 DatasetAssociation,
51 DatasetRef,
52 DatasetType,
53 DimensionGraph,
54 NamedValueSet,
55 StorageClass,
56 Timespan,
57 ddl,
58)
59from .._collection_summary import CollectionSummary
60from .._collectionType import CollectionType
61from .._config import RegistryConfig
62from .._exceptions import (
63 ArgumentError,
64 CollectionError,
65 CollectionTypeError,
66 ConflictingDefinitionError,
67 DataIdValueError,
68 DatasetTypeError,
69 InconsistentDataIdError,
70 MissingCollectionError,
71 MissingDatasetTypeError,
72 OrphanedRecordError,
73)
74from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 from .._registry import Registry
80class RegistryTests(ABC):
81 """Generic tests for the `Registry` class that can be subclassed to
82 generate tests for different configurations.
83 """
85 collectionsManager: Optional[str] = None
86 """Name of the collections manager class, if subclass provides value for
87 this member then it overrides name specified in default configuration
88 (`str`).
89 """
91 datasetsManager: Optional[str] = None
92 """Name of the datasets manager class, if subclass provides value for
93 this member then it overrides name specified in default configuration
94 (`str`).
95 """
97 @classmethod
98 @abstractmethod
99 def getDataDir(cls) -> str:
100 """Return the root directory containing test data YAML files."""
101 raise NotImplementedError()
103 def makeRegistryConfig(self) -> RegistryConfig:
104 """Create RegistryConfig used to create a registry.
106 This method should be called by a subclass from `makeRegistry`.
107 Returned instance will be pre-configured based on the values of class
108 members, and default-configured for all other parameters. Subclasses
109 that need default configuration should just instantiate
110 `RegistryConfig` directly.
111 """
112 config = RegistryConfig()
113 if self.collectionsManager:
114 config["managers", "collections"] = self.collectionsManager
115 if self.datasetsManager:
116 config["managers", "datasets"] = self.datasetsManager
117 return config
119 @abstractmethod
120 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
121 """Return the Registry instance to be tested.
123 Parameters
124 ----------
125 share_repo_with : `Registry`, optional
126 If provided, the new registry should point to the same data
127 repository as this existing registry.
129 Returns
130 -------
131 registry : `Registry`
132 New `Registry` instance, or `None` *only* if `share_repo_with` is
133 not `None` and this test case does not support that argument
134 (e.g. it is impossible with in-memory SQLite DBs).
135 """
136 raise NotImplementedError()
138 def loadData(self, registry: Registry, filename: str):
139 """Load registry test data from ``getDataDir/<filename>``,
140 which should be a YAML import/export file.
141 """
142 from ...transfers import YamlRepoImportBackend
144 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
145 backend = YamlRepoImportBackend(stream, registry)
146 backend.register()
147 backend.load(datastore=None)
149 def checkQueryResults(self, results, expected):
150 """Check that a query results object contains expected values.
152 Parameters
153 ----------
154 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
155 A lazy-evaluation query results object.
156 expected : `list`
157 A list of `DataCoordinate` o `DatasetRef` objects that should be
158 equal to results of the query, aside from ordering.
159 """
160 self.assertCountEqual(list(results), expected)
161 self.assertEqual(results.count(), len(expected))
162 if expected:
163 self.assertTrue(results.any())
164 else:
165 self.assertFalse(results.any())
167 def testOpaque(self):
168 """Tests for `Registry.registerOpaqueTable`,
169 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
170 `Registry.deleteOpaqueData`.
171 """
172 registry = self.makeRegistry()
173 table = "opaque_table_for_testing"
174 registry.registerOpaqueTable(
175 table,
176 spec=ddl.TableSpec(
177 fields=[
178 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
179 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
180 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
181 ],
182 ),
183 )
184 rows = [
185 {"id": 1, "name": "one", "count": None},
186 {"id": 2, "name": "two", "count": 5},
187 {"id": 3, "name": "three", "count": 6},
188 ]
189 registry.insertOpaqueData(table, *rows)
190 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
191 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
192 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
193 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
194 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
195 # Test very long IN clause which exceeds sqlite limit on number of
196 # parameters. SQLite says the limit is 32k but it looks like it is
197 # much higher.
198 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
199 # Two IN clauses, each longer than 1k batch size, first with
200 # duplicates, second has matching elements in different batches (after
201 # sorting).
202 self.assertEqual(
203 rows[0:2],
204 list(
205 registry.fetchOpaqueData(
206 table,
207 id=list(range(1000)) + list(range(100, 0, -1)),
208 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
209 )
210 ),
211 )
212 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
213 registry.deleteOpaqueData(table, id=3)
214 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
215 registry.deleteOpaqueData(table)
216 self.assertEqual([], list(registry.fetchOpaqueData(table)))
218 def testDatasetType(self):
219 """Tests for `Registry.registerDatasetType` and
220 `Registry.getDatasetType`.
221 """
222 registry = self.makeRegistry()
223 # Check valid insert
224 datasetTypeName = "test"
225 storageClass = StorageClass("testDatasetType")
226 registry.storageClasses.registerStorageClass(storageClass)
227 dimensions = registry.dimensions.extract(("instrument", "visit"))
228 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
229 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
230 # Inserting for the first time should return True
231 self.assertTrue(registry.registerDatasetType(inDatasetType))
232 outDatasetType1 = registry.getDatasetType(datasetTypeName)
233 self.assertEqual(outDatasetType1, inDatasetType)
235 # Re-inserting should work
236 self.assertFalse(registry.registerDatasetType(inDatasetType))
237 # Except when they are not identical
238 with self.assertRaises(ConflictingDefinitionError):
239 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
240 registry.registerDatasetType(nonIdenticalDatasetType)
242 # Template can be None
243 datasetTypeName = "testNoneTemplate"
244 storageClass = StorageClass("testDatasetType2")
245 registry.storageClasses.registerStorageClass(storageClass)
246 dimensions = registry.dimensions.extract(("instrument", "visit"))
247 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
248 registry.registerDatasetType(inDatasetType)
249 outDatasetType2 = registry.getDatasetType(datasetTypeName)
250 self.assertEqual(outDatasetType2, inDatasetType)
252 allTypes = set(registry.queryDatasetTypes())
253 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
255 def testDimensions(self):
256 """Tests for `Registry.insertDimensionData`,
257 `Registry.syncDimensionData`, and `Registry.expandDataId`.
258 """
259 registry = self.makeRegistry()
260 dimensionName = "instrument"
261 dimension = registry.dimensions[dimensionName]
262 dimensionValue = {
263 "name": "DummyCam",
264 "visit_max": 10,
265 "visit_system": 0,
266 "exposure_max": 10,
267 "detector_max": 2,
268 "class_name": "lsst.pipe.base.Instrument",
269 }
270 registry.insertDimensionData(dimensionName, dimensionValue)
271 # Inserting the same value twice should fail
272 with self.assertRaises(sqlalchemy.exc.IntegrityError):
273 registry.insertDimensionData(dimensionName, dimensionValue)
274 # expandDataId should retrieve the record we just inserted
275 self.assertEqual(
276 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
277 .records[dimensionName]
278 .toDict(),
279 dimensionValue,
280 )
281 # expandDataId should raise if there is no record with the given ID.
282 with self.assertRaises(DataIdValueError):
283 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
284 # band doesn't have a table; insert should fail.
285 with self.assertRaises(TypeError):
286 registry.insertDimensionData("band", {"band": "i"})
287 dimensionName2 = "physical_filter"
288 dimension2 = registry.dimensions[dimensionName2]
289 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
290 # Missing required dependency ("instrument") should fail
291 with self.assertRaises(KeyError):
292 registry.insertDimensionData(dimensionName2, dimensionValue2)
293 # Adding required dependency should fix the failure
294 dimensionValue2["instrument"] = "DummyCam"
295 registry.insertDimensionData(dimensionName2, dimensionValue2)
296 # expandDataId should retrieve the record we just inserted.
297 self.assertEqual(
298 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
299 .records[dimensionName2]
300 .toDict(),
301 dimensionValue2,
302 )
303 # Use syncDimensionData to insert a new record successfully.
304 dimensionName3 = "detector"
305 dimensionValue3 = {
306 "instrument": "DummyCam",
307 "id": 1,
308 "full_name": "one",
309 "name_in_raft": "zero",
310 "purpose": "SCIENCE",
311 }
312 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
313 # Sync that again. Note that one field ("raft") is NULL, and that
314 # should be okay.
315 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
316 # Now try that sync with the same primary key but a different value.
317 # This should fail.
318 with self.assertRaises(ConflictingDefinitionError):
319 registry.syncDimensionData(
320 dimensionName3,
321 {
322 "instrument": "DummyCam",
323 "id": 1,
324 "full_name": "one",
325 "name_in_raft": "four",
326 "purpose": "SCIENCE",
327 },
328 )
330 @unittest.skipIf(np is None, "numpy not available.")
331 def testNumpyDataId(self):
332 """Test that we can use a numpy int in a dataId."""
333 registry = self.makeRegistry()
334 dimensionEntries = [
335 ("instrument", {"instrument": "DummyCam"}),
336 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
337 # Using an np.int64 here fails unless Records.fromDict is also
338 # patched to look for numbers.Integral
339 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
340 ]
341 for args in dimensionEntries:
342 registry.insertDimensionData(*args)
344 # Try a normal integer and something that looks like an int but
345 # is not.
346 for visit_id in (42, np.int64(42)):
347 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
348 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
349 self.assertEqual(expanded["visit"], int(visit_id))
350 self.assertIsInstance(expanded["visit"], int)
352 def testDataIdRelationships(self):
353 """Test that `Registry.expandDataId` raises an exception when the given
354 keys are inconsistent.
355 """
356 registry = self.makeRegistry()
357 self.loadData(registry, "base.yaml")
358 # Insert a few more dimension records for the next test.
359 registry.insertDimensionData(
360 "exposure",
361 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
362 )
363 registry.insertDimensionData(
364 "exposure",
365 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
366 )
367 registry.insertDimensionData(
368 "visit_system",
369 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
370 )
371 registry.insertDimensionData(
372 "visit",
373 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
374 )
375 registry.insertDimensionData(
376 "visit_definition",
377 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
378 )
379 with self.assertRaises(InconsistentDataIdError):
380 registry.expandDataId(
381 {"instrument": "Cam1", "visit": 1, "exposure": 2},
382 )
384 def testDataset(self):
385 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
386 and `Registry.removeDatasets`.
387 """
388 registry = self.makeRegistry()
389 self.loadData(registry, "base.yaml")
390 run = "tésτ"
391 registry.registerRun(run)
392 datasetType = registry.getDatasetType("bias")
393 dataId = {"instrument": "Cam1", "detector": 2}
394 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
395 outRef = registry.getDataset(ref.id)
396 self.assertIsNotNone(ref.id)
397 self.assertEqual(ref, outRef)
398 with self.assertRaises(ConflictingDefinitionError):
399 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
400 registry.removeDatasets([ref])
401 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
403 def testFindDataset(self):
404 """Tests for `Registry.findDataset`."""
405 registry = self.makeRegistry()
406 self.loadData(registry, "base.yaml")
407 run = "tésτ"
408 datasetType = registry.getDatasetType("bias")
409 dataId = {"instrument": "Cam1", "detector": 4}
410 registry.registerRun(run)
411 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
412 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
413 self.assertEqual(outputRef, inputRef)
414 # Check that retrieval with invalid dataId raises
415 with self.assertRaises(LookupError):
416 dataId = {"instrument": "Cam1"} # no detector
417 registry.findDataset(datasetType, dataId, collections=run)
418 # Check that different dataIds match to different datasets
419 dataId1 = {"instrument": "Cam1", "detector": 1}
420 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
421 dataId2 = {"instrument": "Cam1", "detector": 2}
422 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
423 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
424 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
425 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
426 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
427 # Check that requesting a non-existing dataId returns None
428 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
429 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
431 def testRemoveDatasetTypeSuccess(self):
432 """Test that Registry.removeDatasetType works when there are no
433 datasets of that type present.
434 """
435 registry = self.makeRegistry()
436 self.loadData(registry, "base.yaml")
437 registry.removeDatasetType("flat")
438 with self.assertRaises(MissingDatasetTypeError):
439 registry.getDatasetType("flat")
441 def testRemoveDatasetTypeFailure(self):
442 """Test that Registry.removeDatasetType raises when there are datasets
443 of that type present or if the dataset type is for a component.
444 """
445 registry = self.makeRegistry()
446 self.loadData(registry, "base.yaml")
447 self.loadData(registry, "datasets.yaml")
448 with self.assertRaises(OrphanedRecordError):
449 registry.removeDatasetType("flat")
450 with self.assertRaises(ValueError):
451 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
453 def testImportDatasetsUUID(self):
454 """Test for `Registry._importDatasets` with UUID dataset ID."""
455 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
456 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
458 registry = self.makeRegistry()
459 self.loadData(registry, "base.yaml")
460 for run in range(6):
461 registry.registerRun(f"run{run}")
462 datasetTypeBias = registry.getDatasetType("bias")
463 datasetTypeFlat = registry.getDatasetType("flat")
464 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
465 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
466 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
468 dataset_id = uuid.uuid4()
469 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
470 (ref1,) = registry._importDatasets([ref])
471 # UUID is used without change
472 self.assertEqual(ref.id, ref1.id)
474 # All different failure modes
475 refs = (
476 # Importing same DatasetRef with different dataset ID is an error
477 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
478 # Same DatasetId but different DataId
479 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
480 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
481 # Same DatasetRef and DatasetId but different run
482 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
483 )
484 for ref in refs:
485 with self.assertRaises(ConflictingDefinitionError):
486 registry._importDatasets([ref])
488 # Test for non-unique IDs, they can be re-imported multiple times.
489 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
490 with self.subTest(idGenMode=idGenMode):
492 # Use integer dataset ID to force UUID calculation in _import
493 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
494 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
495 self.assertIsInstance(ref1.id, uuid.UUID)
496 self.assertEqual(ref1.id.version, 5)
498 # Importing it again is OK
499 (ref2,) = registry._importDatasets([ref1])
500 self.assertEqual(ref2.id, ref1.id)
502 # Cannot import to different run with the same ID
503 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
504 with self.assertRaises(ConflictingDefinitionError):
505 registry._importDatasets([ref])
507 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
508 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
509 # Cannot import same DATAID_TYPE ref into a new run
510 with self.assertRaises(ConflictingDefinitionError):
511 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
512 else:
513 # DATAID_TYPE_RUN ref can be imported into a new run
514 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
516 def testDatasetTypeComponentQueries(self):
517 """Test component options when querying for dataset types.
519 All of the behavior here is deprecated, so many of these tests are
520 currently wrapped in a context to check that we get a warning whenever
521 a component dataset is actually returned.
522 """
523 registry = self.makeRegistry()
524 self.loadData(registry, "base.yaml")
525 self.loadData(registry, "datasets.yaml")
526 # Test querying for dataset types with different inputs.
527 # First query for all dataset types; components should only be included
528 # when components=True.
529 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
530 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
531 with self.assertWarns(FutureWarning):
532 self.assertLess(
533 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
534 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
535 )
536 # Use a pattern that can match either parent or components. Again,
537 # components are only returned if components=True.
538 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
539 self.assertEqual(
540 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
541 )
542 with self.assertWarns(FutureWarning):
543 self.assertLess(
544 {"bias", "bias.wcs"},
545 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
546 )
547 # This pattern matches only a component. In this case we also return
548 # that component dataset type if components=None.
549 with self.assertWarns(FutureWarning):
550 self.assertEqual(
551 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
552 )
553 self.assertEqual(
554 set(),
555 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
556 )
557 with self.assertWarns(FutureWarning):
558 self.assertEqual(
559 {"bias.wcs"},
560 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
561 )
562 # Add a dataset type using a StorageClass that we'll then remove; check
563 # that this does not affect our ability to query for dataset types
564 # (though it will warn).
565 tempStorageClass = StorageClass(
566 name="TempStorageClass",
567 components={
568 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
569 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
570 },
571 )
572 registry.storageClasses.registerStorageClass(tempStorageClass)
573 datasetType = DatasetType(
574 "temporary",
575 dimensions=["instrument"],
576 storageClass=tempStorageClass,
577 universe=registry.dimensions,
578 )
579 registry.registerDatasetType(datasetType)
580 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
581 datasetType._storageClass = None
582 del tempStorageClass
583 # Querying for all dataset types, including components, should include
584 # at least all non-component dataset types (and I don't want to
585 # enumerate all of the Exposure components for bias and flat here).
586 with self.assertWarns(FutureWarning):
587 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
588 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
589 self.assertIn("TempStorageClass", cm.output[0])
590 self.assertLess({"bias", "flat", "temporary"}, everything.names)
591 # It should not include "temporary.columns", because we tried to remove
592 # the storage class that would tell it about that. So if the next line
593 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
594 # this part of the test isn't doing anything, because the _unregister
595 # call about isn't simulating the real-life case we want it to
596 # simulate, in which different versions of daf_butler in entirely
597 # different Python processes interact with the same repo.
598 self.assertNotIn("temporary.data", everything.names)
599 # Query for dataset types that start with "temp". This should again
600 # not include the component, and also not fail.
601 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
602 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
603 self.assertIn("TempStorageClass", cm.output[0])
604 self.assertEqual({"temporary"}, startsWithTemp.names)
605 # Querying with no components should not warn at all.
606 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
607 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
608 # Must issue a warning of our own to be captured.
609 logging.getLogger("lsst.daf.butler.registries").warning("test message")
610 self.assertEqual(len(cm.output), 1)
611 self.assertIn("test message", cm.output[0])
613 def testComponentLookups(self):
614 """Test searching for component datasets via their parents.
616 All of the behavior here is deprecated, so many of these tests are
617 currently wrapped in a context to check that we get a warning whenever
618 a component dataset is actually returned.
619 """
620 registry = self.makeRegistry()
621 self.loadData(registry, "base.yaml")
622 self.loadData(registry, "datasets.yaml")
623 # Test getting the child dataset type (which does still exist in the
624 # Registry), and check for consistency with
625 # DatasetRef.makeComponentRef.
626 collection = "imported_g"
627 parentType = registry.getDatasetType("bias")
628 childType = registry.getDatasetType("bias.wcs")
629 parentRefResolved = registry.findDataset(
630 parentType, collections=collection, instrument="Cam1", detector=1
631 )
632 self.assertIsInstance(parentRefResolved, DatasetRef)
633 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
634 # Search for a single dataset with findDataset.
635 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
636 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
637 # Search for detector data IDs constrained by component dataset
638 # existence with queryDataIds.
639 with self.assertWarns(FutureWarning):
640 dataIds = registry.queryDataIds(
641 ["detector"],
642 datasets=["bias.wcs"],
643 collections=collection,
644 ).toSet()
645 self.assertEqual(
646 dataIds,
647 DataCoordinateSet(
648 {
649 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
650 for d in (1, 2, 3)
651 },
652 parentType.dimensions,
653 ),
654 )
655 # Search for multiple datasets of a single type with queryDatasets.
656 with self.assertWarns(FutureWarning):
657 childRefs2 = set(
658 registry.queryDatasets(
659 "bias.wcs",
660 collections=collection,
661 )
662 )
663 self.assertEqual(
664 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
665 )
667 def testCollections(self):
668 """Tests for registry methods that manage collections."""
669 registry = self.makeRegistry()
670 other_registry = self.makeRegistry(share_repo_with=registry)
671 self.loadData(registry, "base.yaml")
672 self.loadData(registry, "datasets.yaml")
673 run1 = "imported_g"
674 run2 = "imported_r"
675 # Test setting a collection docstring after it has been created.
676 registry.setCollectionDocumentation(run1, "doc for run1")
677 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
678 registry.setCollectionDocumentation(run1, None)
679 self.assertIsNone(registry.getCollectionDocumentation(run1))
680 datasetType = "bias"
681 # Find some datasets via their run's collection.
682 dataId1 = {"instrument": "Cam1", "detector": 1}
683 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
684 self.assertIsNotNone(ref1)
685 dataId2 = {"instrument": "Cam1", "detector": 2}
686 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
687 self.assertIsNotNone(ref2)
688 # Associate those into a new collection, then look for them there.
689 tag1 = "tag1"
690 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
691 # Check that we can query for old and new collections by type.
692 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
693 self.assertEqual(
694 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
695 {tag1, run1, run2},
696 )
697 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
698 registry.associate(tag1, [ref1, ref2])
699 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
700 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
701 # Disassociate one and verify that we can't it there anymore...
702 registry.disassociate(tag1, [ref1])
703 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
704 # ...but we can still find ref2 in tag1, and ref1 in the run.
705 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
706 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
707 collections = set(registry.queryCollections())
708 self.assertEqual(collections, {run1, run2, tag1})
709 # Associate both refs into tag1 again; ref2 is already there, but that
710 # should be a harmless no-op.
711 registry.associate(tag1, [ref1, ref2])
712 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
713 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
714 # Get a different dataset (from a different run) that has the same
715 # dataset type and data ID as ref2.
716 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
717 self.assertNotEqual(ref2, ref2b)
718 # Attempting to associate that into tag1 should be an error.
719 with self.assertRaises(ConflictingDefinitionError):
720 registry.associate(tag1, [ref2b])
721 # That error shouldn't have messed up what we had before.
722 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
723 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
724 # Attempt to associate the conflicting dataset again, this time with
725 # a dataset that isn't in the collection and won't cause a conflict.
726 # Should also fail without modifying anything.
727 dataId3 = {"instrument": "Cam1", "detector": 3}
728 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
729 with self.assertRaises(ConflictingDefinitionError):
730 registry.associate(tag1, [ref3, ref2b])
731 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
732 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
733 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
734 # Register a chained collection that searches [tag1, run2]
735 chain1 = "chain1"
736 registry.registerCollection(chain1, type=CollectionType.CHAINED)
737 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
738 # Chained collection exists, but has no collections in it.
739 self.assertFalse(registry.getCollectionChain(chain1))
740 # If we query for all collections, we should get the chained collection
741 # only if we don't ask to flatten it (i.e. yield only its children).
742 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
743 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
744 # Attempt to set its child collections to something circular; that
745 # should fail.
746 with self.assertRaises(ValueError):
747 registry.setCollectionChain(chain1, [tag1, chain1])
748 # Add the child collections.
749 registry.setCollectionChain(chain1, [tag1, run2])
750 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
751 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
752 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
753 # Refresh the other registry that points to the same repo, and make
754 # sure it can see the things we've done (note that this does require
755 # an explicit refresh(); that's the documented behavior, because
756 # caching is ~impossible otherwise).
757 if other_registry is not None:
758 other_registry.refresh()
759 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
760 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
761 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
762 # Searching for dataId1 or dataId2 in the chain should return ref1 and
763 # ref2, because both are in tag1.
764 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
765 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
766 # Now disassociate ref2 from tag1. The search (for bias) with
767 # dataId2 in chain1 should then:
768 # 1. not find it in tag1
769 # 2. find a different dataset in run2
770 registry.disassociate(tag1, [ref2])
771 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
772 self.assertNotEqual(ref2b, ref2)
773 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
774 # Define a new chain so we can test recursive chains.
775 chain2 = "chain2"
776 registry.registerCollection(chain2, type=CollectionType.CHAINED)
777 registry.setCollectionChain(chain2, [run2, chain1])
778 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
779 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
780 # Query for collections matching a regex.
781 self.assertCountEqual(
782 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
783 ["imported_r", "imported_g"],
784 )
785 # Query for collections matching a regex or an explicit str.
786 self.assertCountEqual(
787 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
788 ["imported_r", "imported_g", "chain1"],
789 )
790 # Search for bias with dataId1 should find it via tag1 in chain2,
791 # recursing, because is not in run1.
792 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
793 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
794 # Search for bias with dataId2 should find it in run2 (ref2b).
795 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
796 # Search for a flat that is in run2. That should not be found
797 # at the front of chain2, because of the restriction to bias
798 # on run2 there, but it should be found in at the end of chain1.
799 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
800 ref4 = registry.findDataset("flat", dataId4, collections=run2)
801 self.assertIsNotNone(ref4)
802 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
803 # Deleting a collection that's part of a CHAINED collection is not
804 # allowed, and is exception-safe.
805 with self.assertRaises(Exception):
806 registry.removeCollection(run2)
807 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
808 with self.assertRaises(Exception):
809 registry.removeCollection(chain1)
810 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
811 # Actually remove chain2, test that it's gone by asking for its type.
812 registry.removeCollection(chain2)
813 with self.assertRaises(MissingCollectionError):
814 registry.getCollectionType(chain2)
815 # Actually remove run2 and chain1, which should work now.
816 registry.removeCollection(chain1)
817 registry.removeCollection(run2)
818 with self.assertRaises(MissingCollectionError):
819 registry.getCollectionType(run2)
820 with self.assertRaises(MissingCollectionError):
821 registry.getCollectionType(chain1)
822 # Remove tag1 as well, just to test that we can remove TAGGED
823 # collections.
824 registry.removeCollection(tag1)
825 with self.assertRaises(MissingCollectionError):
826 registry.getCollectionType(tag1)
828 def testCollectionChainFlatten(self):
829 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
830 registry = self.makeRegistry()
831 registry.registerCollection("inner", CollectionType.CHAINED)
832 registry.registerCollection("innermost", CollectionType.RUN)
833 registry.setCollectionChain("inner", ["innermost"])
834 registry.registerCollection("outer", CollectionType.CHAINED)
835 registry.setCollectionChain("outer", ["inner"], flatten=False)
836 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
837 registry.setCollectionChain("outer", ["inner"], flatten=True)
838 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
840 def testBasicTransaction(self):
841 """Test that all operations within a single transaction block are
842 rolled back if an exception propagates out of the block.
843 """
844 registry = self.makeRegistry()
845 storageClass = StorageClass("testDatasetType")
846 registry.storageClasses.registerStorageClass(storageClass)
847 with registry.transaction():
848 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
849 with self.assertRaises(ValueError):
850 with registry.transaction():
851 registry.insertDimensionData("instrument", {"name": "Cam2"})
852 raise ValueError("Oops, something went wrong")
853 # Cam1 should exist
854 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
855 # But Cam2 and Cam3 should both not exist
856 with self.assertRaises(DataIdValueError):
857 registry.expandDataId(instrument="Cam2")
858 with self.assertRaises(DataIdValueError):
859 registry.expandDataId(instrument="Cam3")
861 def testNestedTransaction(self):
862 """Test that operations within a transaction block are not rolled back
863 if an exception propagates out of an inner transaction block and is
864 then caught.
865 """
866 registry = self.makeRegistry()
867 dimension = registry.dimensions["instrument"]
868 dataId1 = {"instrument": "DummyCam"}
869 dataId2 = {"instrument": "DummyCam2"}
870 checkpointReached = False
871 with registry.transaction():
872 # This should be added and (ultimately) committed.
873 registry.insertDimensionData(dimension, dataId1)
874 with self.assertRaises(sqlalchemy.exc.IntegrityError):
875 with registry.transaction(savepoint=True):
876 # This does not conflict, and should succeed (but not
877 # be committed).
878 registry.insertDimensionData(dimension, dataId2)
879 checkpointReached = True
880 # This should conflict and raise, triggerring a rollback
881 # of the previous insertion within the same transaction
882 # context, but not the original insertion in the outer
883 # block.
884 registry.insertDimensionData(dimension, dataId1)
885 self.assertTrue(checkpointReached)
886 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
887 with self.assertRaises(DataIdValueError):
888 registry.expandDataId(dataId2, graph=dimension.graph)
890 def testInstrumentDimensions(self):
891 """Test queries involving only instrument dimensions, with no joins to
892 skymap."""
893 registry = self.makeRegistry()
895 # need a bunch of dimensions and datasets for test
896 registry.insertDimensionData(
897 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
898 )
899 registry.insertDimensionData(
900 "physical_filter",
901 dict(instrument="DummyCam", name="dummy_r", band="r"),
902 dict(instrument="DummyCam", name="dummy_i", band="i"),
903 )
904 registry.insertDimensionData(
905 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
906 )
907 registry.insertDimensionData(
908 "visit_system",
909 dict(instrument="DummyCam", id=1, name="default"),
910 )
911 registry.insertDimensionData(
912 "visit",
913 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
914 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
915 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
916 )
917 for i in range(1, 6):
918 registry.insertDimensionData(
919 "visit_detector_region",
920 dict(instrument="DummyCam", visit=10, detector=i),
921 dict(instrument="DummyCam", visit=11, detector=i),
922 dict(instrument="DummyCam", visit=20, detector=i),
923 )
924 registry.insertDimensionData(
925 "exposure",
926 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
927 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
928 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
929 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
930 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
931 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
932 )
933 registry.insertDimensionData(
934 "visit_definition",
935 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
936 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
937 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
938 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
939 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
940 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
941 )
942 # dataset types
943 run1 = "test1_r"
944 run2 = "test2_r"
945 tagged2 = "test2_t"
946 registry.registerRun(run1)
947 registry.registerRun(run2)
948 registry.registerCollection(tagged2)
949 storageClass = StorageClass("testDataset")
950 registry.storageClasses.registerStorageClass(storageClass)
951 rawType = DatasetType(
952 name="RAW",
953 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
954 storageClass=storageClass,
955 )
956 registry.registerDatasetType(rawType)
957 calexpType = DatasetType(
958 name="CALEXP",
959 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
960 storageClass=storageClass,
961 )
962 registry.registerDatasetType(calexpType)
964 # add pre-existing datasets
965 for exposure in (100, 101, 110, 111):
966 for detector in (1, 2, 3):
967 # note that only 3 of 5 detectors have datasets
968 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
969 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
970 # exposures 100 and 101 appear in both run1 and tagged2.
971 # 100 has different datasets in the different collections
972 # 101 has the same dataset in both collections.
973 if exposure == 100:
974 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
975 if exposure in (100, 101):
976 registry.associate(tagged2, [ref])
977 # Add pre-existing datasets to tagged2.
978 for exposure in (200, 201):
979 for detector in (3, 4, 5):
980 # note that only 3 of 5 detectors have datasets
981 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
982 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
983 registry.associate(tagged2, [ref])
985 dimensions = DimensionGraph(
986 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
987 )
988 # Test that single dim string works as well as list of str
989 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
990 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
991 self.assertEqual(rows, rowsI)
992 # with empty expression
993 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
994 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
995 for dataId in rows:
996 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
997 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
998 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
999 self.assertEqual(
1000 packer1.unpack(packer1.pack(dataId)),
1001 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1002 )
1003 self.assertEqual(
1004 packer2.unpack(packer2.pack(dataId)),
1005 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1006 )
1007 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1008 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1009 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1010 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1012 # second collection
1013 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1014 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1015 for dataId in rows:
1016 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1017 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1018 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1019 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1021 # with two input datasets
1022 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1023 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1024 for dataId in rows:
1025 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1026 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1027 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1028 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1030 # limit to single visit
1031 rows = registry.queryDataIds(
1032 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1033 ).toSet()
1034 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1035 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1036 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1037 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1039 # more limiting expression, using link names instead of Table.column
1040 rows = registry.queryDataIds(
1041 dimensions,
1042 datasets=rawType,
1043 collections=run1,
1044 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1045 ).toSet()
1046 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1047 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1048 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1049 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1051 # queryDataIds with only one of `datasets` and `collections` is an
1052 # error.
1053 with self.assertRaises(CollectionError):
1054 registry.queryDataIds(dimensions, datasets=rawType)
1055 with self.assertRaises(ArgumentError):
1056 registry.queryDataIds(dimensions, collections=run1)
1058 # expression excludes everything
1059 rows = registry.queryDataIds(
1060 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1061 ).toSet()
1062 self.assertEqual(len(rows), 0)
1064 # Selecting by physical_filter, this is not in the dimensions, but it
1065 # is a part of the full expression so it should work too.
1066 rows = registry.queryDataIds(
1067 dimensions,
1068 datasets=rawType,
1069 collections=run1,
1070 where="physical_filter = 'dummy_r'",
1071 instrument="DummyCam",
1072 ).toSet()
1073 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1074 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1075 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1076 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1078 def testSkyMapDimensions(self):
1079 """Tests involving only skymap dimensions, no joins to instrument."""
1080 registry = self.makeRegistry()
1082 # need a bunch of dimensions and datasets for test, we want
1083 # "band" in the test so also have to add physical_filter
1084 # dimensions
1085 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1086 registry.insertDimensionData(
1087 "physical_filter",
1088 dict(instrument="DummyCam", name="dummy_r", band="r"),
1089 dict(instrument="DummyCam", name="dummy_i", band="i"),
1090 )
1091 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1092 for tract in range(10):
1093 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1094 registry.insertDimensionData(
1095 "patch",
1096 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1097 )
1099 # dataset types
1100 run = "tésτ"
1101 registry.registerRun(run)
1102 storageClass = StorageClass("testDataset")
1103 registry.storageClasses.registerStorageClass(storageClass)
1104 calexpType = DatasetType(
1105 name="deepCoadd_calexp",
1106 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1107 storageClass=storageClass,
1108 )
1109 registry.registerDatasetType(calexpType)
1110 mergeType = DatasetType(
1111 name="deepCoadd_mergeDet",
1112 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1113 storageClass=storageClass,
1114 )
1115 registry.registerDatasetType(mergeType)
1116 measType = DatasetType(
1117 name="deepCoadd_meas",
1118 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1119 storageClass=storageClass,
1120 )
1121 registry.registerDatasetType(measType)
1123 dimensions = DimensionGraph(
1124 registry.dimensions,
1125 dimensions=(
1126 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1127 ),
1128 )
1130 # add pre-existing datasets
1131 for tract in (1, 3, 5):
1132 for patch in (2, 4, 6, 7):
1133 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1134 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1135 for aFilter in ("i", "r"):
1136 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1137 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1139 # with empty expression
1140 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1141 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1142 for dataId in rows:
1143 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1144 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1145 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1146 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1148 # limit to 2 tracts and 2 patches
1149 rows = registry.queryDataIds(
1150 dimensions,
1151 datasets=[calexpType, mergeType],
1152 collections=run,
1153 where="tract IN (1, 5) AND patch IN (2, 7)",
1154 skymap="DummyMap",
1155 ).toSet()
1156 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1157 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1158 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1159 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1161 # limit to single filter
1162 rows = registry.queryDataIds(
1163 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1164 ).toSet()
1165 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1166 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1167 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1168 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1170 # Specifying non-existing skymap is an exception
1171 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1172 rows = registry.queryDataIds(
1173 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1174 ).toSet()
1176 def testSpatialJoin(self):
1177 """Test queries that involve spatial overlap joins."""
1178 registry = self.makeRegistry()
1179 self.loadData(registry, "hsc-rc2-subset.yaml")
1181 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1182 # the TopologicalFamily they belong to. We'll relate all elements in
1183 # each family to all of the elements in each other family.
1184 families = defaultdict(set)
1185 # Dictionary of {element.name: {dataId: region}}.
1186 regions = {}
1187 for element in registry.dimensions.getDatabaseElements():
1188 if element.spatial is not None:
1189 families[element.spatial.name].add(element)
1190 regions[element.name] = {
1191 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1192 }
1194 # If this check fails, it's not necessarily a problem - it may just be
1195 # a reasonable change to the default dimension definitions - but the
1196 # test below depends on there being more than one family to do anything
1197 # useful.
1198 self.assertEqual(len(families), 2)
1200 # Overlap DatabaseDimensionElements with each other.
1201 for family1, family2 in itertools.combinations(families, 2):
1202 for element1, element2 in itertools.product(families[family1], families[family2]):
1203 graph = DimensionGraph.union(element1.graph, element2.graph)
1204 # Construct expected set of overlapping data IDs via a
1205 # brute-force comparison of the regions we've already fetched.
1206 expected = {
1207 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1208 for (dataId1, region1), (dataId2, region2) in itertools.product(
1209 regions[element1.name].items(), regions[element2.name].items()
1210 )
1211 if not region1.isDisjointFrom(region2)
1212 }
1213 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1214 queried = set(registry.queryDataIds(graph))
1215 self.assertEqual(expected, queried)
1217 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1218 commonSkyPix = registry.dimensions.commonSkyPix
1219 for elementName, regions in regions.items():
1220 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1221 expected = set()
1222 for dataId, region in regions.items():
1223 for begin, end in commonSkyPix.pixelization.envelope(region):
1224 expected.update(
1225 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1226 for index in range(begin, end)
1227 )
1228 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1229 queried = set(registry.queryDataIds(graph))
1230 self.assertEqual(expected, queried)
1232 def testAbstractQuery(self):
1233 """Test that we can run a query that just lists the known
1234 bands. This is tricky because band is
1235 backed by a query against physical_filter.
1236 """
1237 registry = self.makeRegistry()
1238 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1239 registry.insertDimensionData(
1240 "physical_filter",
1241 dict(instrument="DummyCam", name="dummy_i", band="i"),
1242 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1243 dict(instrument="DummyCam", name="dummy_r", band="r"),
1244 )
1245 rows = registry.queryDataIds(["band"]).toSet()
1246 self.assertCountEqual(
1247 rows,
1248 [
1249 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1250 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1251 ],
1252 )
1254 def testAttributeManager(self):
1255 """Test basic functionality of attribute manager."""
1256 # number of attributes with schema versions in a fresh database,
1257 # 6 managers with 3 records per manager, plus config for dimensions
1258 VERSION_COUNT = 6 * 3 + 1
1260 registry = self.makeRegistry()
1261 attributes = registry._managers.attributes
1263 # check what get() returns for non-existing key
1264 self.assertIsNone(attributes.get("attr"))
1265 self.assertEqual(attributes.get("attr", ""), "")
1266 self.assertEqual(attributes.get("attr", "Value"), "Value")
1267 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1269 # cannot store empty key or value
1270 with self.assertRaises(ValueError):
1271 attributes.set("", "value")
1272 with self.assertRaises(ValueError):
1273 attributes.set("attr", "")
1275 # set value of non-existing key
1276 attributes.set("attr", "value")
1277 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1278 self.assertEqual(attributes.get("attr"), "value")
1280 # update value of existing key
1281 with self.assertRaises(ButlerAttributeExistsError):
1282 attributes.set("attr", "value2")
1284 attributes.set("attr", "value2", force=True)
1285 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1286 self.assertEqual(attributes.get("attr"), "value2")
1288 # delete existing key
1289 self.assertTrue(attributes.delete("attr"))
1290 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1292 # delete non-existing key
1293 self.assertFalse(attributes.delete("non-attr"))
1295 # store bunch of keys and get the list back
1296 data = [
1297 ("version.core", "1.2.3"),
1298 ("version.dimensions", "3.2.1"),
1299 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1300 ]
1301 for key, value in data:
1302 attributes.set(key, value)
1303 items = dict(attributes.items())
1304 for key, value in data:
1305 self.assertEqual(items[key], value)
1307 def testQueryDatasetsDeduplication(self):
1308 """Test that the findFirst option to queryDatasets selects datasets
1309 from collections in the order given".
1310 """
1311 registry = self.makeRegistry()
1312 self.loadData(registry, "base.yaml")
1313 self.loadData(registry, "datasets.yaml")
1314 self.assertCountEqual(
1315 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1316 [
1317 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1318 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1319 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1320 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1321 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1322 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1323 ],
1324 )
1325 self.assertCountEqual(
1326 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1327 [
1328 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1329 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1330 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1331 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1332 ],
1333 )
1334 self.assertCountEqual(
1335 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1336 [
1337 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1338 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1339 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1340 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1341 ],
1342 )
1344 def testQueryResults(self):
1345 """Test querying for data IDs and then manipulating the QueryResults
1346 object returned to perform other queries.
1347 """
1348 registry = self.makeRegistry()
1349 self.loadData(registry, "base.yaml")
1350 self.loadData(registry, "datasets.yaml")
1351 bias = registry.getDatasetType("bias")
1352 flat = registry.getDatasetType("flat")
1353 # Obtain expected results from methods other than those we're testing
1354 # here. That includes:
1355 # - the dimensions of the data IDs we want to query:
1356 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1357 # - the dimensions of some other data IDs we'll extract from that:
1358 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1359 # - the data IDs we expect to obtain from the first queries:
1360 expectedDataIds = DataCoordinateSet(
1361 {
1362 DataCoordinate.standardize(
1363 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1364 )
1365 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1366 },
1367 graph=expectedGraph,
1368 hasFull=False,
1369 hasRecords=False,
1370 )
1371 # - the flat datasets we expect to find from those data IDs, in just
1372 # one collection (so deduplication is irrelevant):
1373 expectedFlats = [
1374 registry.findDataset(
1375 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1376 ),
1377 registry.findDataset(
1378 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1379 ),
1380 registry.findDataset(
1381 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1382 ),
1383 ]
1384 # - the data IDs we expect to extract from that:
1385 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1386 # - the bias datasets we expect to find from those data IDs, after we
1387 # subset-out the physical_filter dimension, both with duplicates:
1388 expectedAllBiases = [
1389 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1390 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1391 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1392 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1393 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1394 ]
1395 # - ...and without duplicates:
1396 expectedDeduplicatedBiases = [
1397 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1398 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1399 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1400 ]
1401 # Test against those expected results, using a "lazy" query for the
1402 # data IDs (which re-executes that query each time we use it to do
1403 # something new).
1404 dataIds = registry.queryDataIds(
1405 ["detector", "physical_filter"],
1406 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1407 instrument="Cam1",
1408 )
1409 self.assertEqual(dataIds.graph, expectedGraph)
1410 self.assertEqual(dataIds.toSet(), expectedDataIds)
1411 self.assertCountEqual(
1412 list(
1413 dataIds.findDatasets(
1414 flat,
1415 collections=["imported_r"],
1416 )
1417 ),
1418 expectedFlats,
1419 )
1420 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1421 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1422 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1423 self.assertCountEqual(
1424 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1425 expectedAllBiases,
1426 )
1427 self.assertCountEqual(
1428 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1429 expectedDeduplicatedBiases,
1430 )
1432 # Check dimensions match.
1433 with self.assertRaises(ValueError):
1434 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1436 # Use a component dataset type.
1437 self.assertCountEqual(
1438 [
1439 ref.makeComponentRef("image")
1440 for ref in subsetDataIds.findDatasets(
1441 bias,
1442 collections=["imported_r", "imported_g"],
1443 findFirst=False,
1444 )
1445 ],
1446 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1447 )
1449 # Use a named dataset type that does not exist and a dataset type
1450 # object that does not exist.
1451 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1453 # Test both string name and dataset type object.
1454 test_type: Union[str, DatasetType]
1455 for test_type, test_type_name in (
1456 (unknown_type, unknown_type.name),
1457 (unknown_type.name, unknown_type.name),
1458 ):
1459 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1460 list(
1461 subsetDataIds.findDatasets(
1462 test_type, collections=["imported_r", "imported_g"], findFirst=True
1463 )
1464 )
1466 # Materialize the bias dataset queries (only) by putting the results
1467 # into temporary tables, then repeat those tests.
1468 with subsetDataIds.findDatasets(
1469 bias, collections=["imported_r", "imported_g"], findFirst=False
1470 ).materialize() as biases:
1471 self.assertCountEqual(list(biases), expectedAllBiases)
1472 with subsetDataIds.findDatasets(
1473 bias, collections=["imported_r", "imported_g"], findFirst=True
1474 ).materialize() as biases:
1475 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1476 # Materialize the data ID subset query, but not the dataset queries.
1477 with subsetDataIds.materialize() as subsetDataIds:
1478 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1479 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1480 self.assertCountEqual(
1481 list(
1482 subsetDataIds.findDatasets(
1483 bias, collections=["imported_r", "imported_g"], findFirst=False
1484 )
1485 ),
1486 expectedAllBiases,
1487 )
1488 self.assertCountEqual(
1489 list(
1490 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1491 ),
1492 expectedDeduplicatedBiases,
1493 )
1494 # Materialize the dataset queries, too.
1495 with subsetDataIds.findDatasets(
1496 bias, collections=["imported_r", "imported_g"], findFirst=False
1497 ).materialize() as biases:
1498 self.assertCountEqual(list(biases), expectedAllBiases)
1499 with subsetDataIds.findDatasets(
1500 bias, collections=["imported_r", "imported_g"], findFirst=True
1501 ).materialize() as biases:
1502 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1503 # Materialize the original query, but none of the follow-up queries.
1504 with dataIds.materialize() as dataIds:
1505 self.assertEqual(dataIds.graph, expectedGraph)
1506 self.assertEqual(dataIds.toSet(), expectedDataIds)
1507 self.assertCountEqual(
1508 list(
1509 dataIds.findDatasets(
1510 flat,
1511 collections=["imported_r"],
1512 )
1513 ),
1514 expectedFlats,
1515 )
1516 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1517 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1518 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1519 self.assertCountEqual(
1520 list(
1521 subsetDataIds.findDatasets(
1522 bias, collections=["imported_r", "imported_g"], findFirst=False
1523 )
1524 ),
1525 expectedAllBiases,
1526 )
1527 self.assertCountEqual(
1528 list(
1529 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1530 ),
1531 expectedDeduplicatedBiases,
1532 )
1533 # Materialize just the bias dataset queries.
1534 with subsetDataIds.findDatasets(
1535 bias, collections=["imported_r", "imported_g"], findFirst=False
1536 ).materialize() as biases:
1537 self.assertCountEqual(list(biases), expectedAllBiases)
1538 with subsetDataIds.findDatasets(
1539 bias, collections=["imported_r", "imported_g"], findFirst=True
1540 ).materialize() as biases:
1541 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1542 # Materialize the subset data ID query, but not the dataset
1543 # queries.
1544 with subsetDataIds.materialize() as subsetDataIds:
1545 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1546 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1547 self.assertCountEqual(
1548 list(
1549 subsetDataIds.findDatasets(
1550 bias, collections=["imported_r", "imported_g"], findFirst=False
1551 )
1552 ),
1553 expectedAllBiases,
1554 )
1555 self.assertCountEqual(
1556 list(
1557 subsetDataIds.findDatasets(
1558 bias, collections=["imported_r", "imported_g"], findFirst=True
1559 )
1560 ),
1561 expectedDeduplicatedBiases,
1562 )
1563 # Materialize the bias dataset queries, too, so now we're
1564 # materializing every single step.
1565 with subsetDataIds.findDatasets(
1566 bias, collections=["imported_r", "imported_g"], findFirst=False
1567 ).materialize() as biases:
1568 self.assertCountEqual(list(biases), expectedAllBiases)
1569 with subsetDataIds.findDatasets(
1570 bias, collections=["imported_r", "imported_g"], findFirst=True
1571 ).materialize() as biases:
1572 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1574 def testStorageClassPropagation(self):
1575 """Test that queries for datasets respect the storage class passed in
1576 as part of a full dataset type.
1577 """
1578 registry = self.makeRegistry()
1579 self.loadData(registry, "base.yaml")
1580 dataset_type_in_registry = DatasetType(
1581 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions
1582 )
1583 registry.registerDatasetType(dataset_type_in_registry)
1584 run = "run1"
1585 registry.registerRun(run)
1586 (inserted_ref,) = registry.insertDatasets(
1587 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1588 )
1589 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1590 query_dataset_type = DatasetType(
1591 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions
1592 )
1593 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1594 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1595 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1596 (query_datasets_ref,) = query_datasets_result
1597 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1598 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1599 query_dataset_type, collections=[run]
1600 )
1601 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1602 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1603 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1604 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1605 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1606 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1607 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1609 def testEmptyDimensionsQueries(self):
1610 """Test Query and QueryResults objects in the case where there are no
1611 dimensions.
1612 """
1613 # Set up test data: one dataset type, two runs, one dataset in each.
1614 registry = self.makeRegistry()
1615 self.loadData(registry, "base.yaml")
1616 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1617 registry.registerDatasetType(schema)
1618 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1619 run1 = "run1"
1620 run2 = "run2"
1621 registry.registerRun(run1)
1622 registry.registerRun(run2)
1623 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1624 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1625 # Query directly for both of the datasets, and each one, one at a time.
1626 self.checkQueryResults(
1627 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1628 )
1629 self.checkQueryResults(
1630 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1631 [dataset1],
1632 )
1633 self.checkQueryResults(
1634 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1635 [dataset2],
1636 )
1637 # Query for data IDs with no dimensions.
1638 dataIds = registry.queryDataIds([])
1639 self.checkQueryResults(dataIds, [dataId])
1640 # Use queried data IDs to find the datasets.
1641 self.checkQueryResults(
1642 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1643 [dataset1, dataset2],
1644 )
1645 self.checkQueryResults(
1646 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1647 [dataset1],
1648 )
1649 self.checkQueryResults(
1650 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1651 [dataset2],
1652 )
1653 # Now materialize the data ID query results and repeat those tests.
1654 with dataIds.materialize() as dataIds:
1655 self.checkQueryResults(dataIds, [dataId])
1656 self.checkQueryResults(
1657 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1658 [dataset1],
1659 )
1660 self.checkQueryResults(
1661 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1662 [dataset2],
1663 )
1664 # Query for non-empty data IDs, then subset that to get the empty one.
1665 # Repeat the above tests starting from that.
1666 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1667 self.checkQueryResults(dataIds, [dataId])
1668 self.checkQueryResults(
1669 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1670 [dataset1, dataset2],
1671 )
1672 self.checkQueryResults(
1673 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1674 [dataset1],
1675 )
1676 self.checkQueryResults(
1677 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1678 [dataset2],
1679 )
1680 with dataIds.materialize() as dataIds:
1681 self.checkQueryResults(dataIds, [dataId])
1682 self.checkQueryResults(
1683 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1684 [dataset1, dataset2],
1685 )
1686 self.checkQueryResults(
1687 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1688 [dataset1],
1689 )
1690 self.checkQueryResults(
1691 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1692 [dataset2],
1693 )
1694 # Query for non-empty data IDs, then materialize, then subset to get
1695 # the empty one. Repeat again.
1696 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1697 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1698 self.checkQueryResults(dataIds, [dataId])
1699 self.checkQueryResults(
1700 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1701 [dataset1, dataset2],
1702 )
1703 self.checkQueryResults(
1704 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1705 [dataset1],
1706 )
1707 self.checkQueryResults(
1708 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1709 [dataset2],
1710 )
1711 with dataIds.materialize() as dataIds:
1712 self.checkQueryResults(dataIds, [dataId])
1713 self.checkQueryResults(
1714 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1715 [dataset1, dataset2],
1716 )
1717 self.checkQueryResults(
1718 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1719 [dataset1],
1720 )
1721 self.checkQueryResults(
1722 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1723 [dataset2],
1724 )
1725 # Query for non-empty data IDs with a constraint on an empty-data-ID
1726 # dataset that exists.
1727 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1728 self.checkQueryResults(
1729 dataIds.subset(unique=True),
1730 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1731 )
1732 # Again query for non-empty data IDs with a constraint on empty-data-ID
1733 # datasets, but when the datasets don't exist. We delete the existing
1734 # dataset and query just that collection rather than creating a new
1735 # empty collection because this is a bit less likely for our build-time
1736 # logic to shortcut-out (via the collection summaries), and such a
1737 # shortcut would make this test a bit more trivial than we'd like.
1738 registry.removeDatasets([dataset2])
1739 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1740 self.checkQueryResults(dataIds, [])
1742 def testDimensionDataModifications(self):
1743 """Test that modifying dimension records via:
1744 syncDimensionData(..., update=True) and
1745 insertDimensionData(..., replace=True) works as expected, even in the
1746 presence of datasets using those dimensions and spatial overlap
1747 relationships.
1748 """
1750 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1751 """Unpack a sphgeom.RangeSet into the integers it contains."""
1752 for begin, end in ranges:
1753 yield from range(begin, end)
1755 def range_set_hull(
1756 ranges: lsst.sphgeom.RangeSet,
1757 pixelization: lsst.sphgeom.HtmPixelization,
1758 ) -> lsst.sphgeom.ConvexPolygon:
1759 """Create a ConvexPolygon hull of the region defined by a set of
1760 HTM pixelization index ranges.
1761 """
1762 points = []
1763 for index in unpack_range_set(ranges):
1764 points.extend(pixelization.triangle(index).getVertices())
1765 return lsst.sphgeom.ConvexPolygon(points)
1767 # Use HTM to set up an initial parent region (one arbitrary trixel)
1768 # and four child regions (the trixels within the parent at the next
1769 # level. We'll use the parent as a tract/visit region and the children
1770 # as its patch/visit_detector regions.
1771 registry = self.makeRegistry()
1772 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1773 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1774 index = 12288
1775 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1776 assert htm6.universe().contains(child_ranges_small)
1777 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1778 parent_region_small = lsst.sphgeom.ConvexPolygon(
1779 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1780 )
1781 assert all(parent_region_small.contains(c) for c in child_regions_small)
1782 # Make a larger version of each child region, defined to be the set of
1783 # htm6 trixels that overlap the original's bounding circle. Make a new
1784 # parent that's the convex hull of the new children.
1785 child_regions_large = [
1786 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1787 ]
1788 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1789 parent_region_large = lsst.sphgeom.ConvexPolygon(
1790 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1791 )
1792 assert all(parent_region_large.contains(c) for c in child_regions_large)
1793 assert parent_region_large.contains(parent_region_small)
1794 assert not parent_region_small.contains(parent_region_large)
1795 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1796 # Find some commonSkyPix indices that overlap the large regions but not
1797 # overlap the small regions. We use commonSkyPix here to make sure the
1798 # real tests later involve what's in the database, not just post-query
1799 # filtering of regions.
1800 child_difference_indices = []
1801 for large, small in zip(child_regions_large, child_regions_small):
1802 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1803 assert difference, "if this is empty, we can't test anything useful with these regions"
1804 assert all(
1805 not commonSkyPix.triangle(d).isDisjointFrom(large)
1806 and commonSkyPix.triangle(d).isDisjointFrom(small)
1807 for d in difference
1808 )
1809 child_difference_indices.append(difference)
1810 parent_difference_indices = list(
1811 unpack_range_set(
1812 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1813 )
1814 )
1815 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1816 assert all(
1817 (
1818 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1819 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1820 )
1821 for d in parent_difference_indices
1822 )
1823 # Now that we've finally got those regions, we'll insert the large ones
1824 # as tract/patch dimension records.
1825 skymap_name = "testing_v1"
1826 registry.insertDimensionData(
1827 "skymap",
1828 {
1829 "name": skymap_name,
1830 "hash": bytes([42]),
1831 "tract_max": 1,
1832 "patch_nx_max": 2,
1833 "patch_ny_max": 2,
1834 },
1835 )
1836 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1837 registry.insertDimensionData(
1838 "patch",
1839 *[
1840 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1841 for n, c in enumerate(child_regions_large)
1842 ],
1843 )
1844 # Add at dataset that uses these dimensions to make sure that modifying
1845 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1846 # implement insert with replace=True as delete-then-insert).
1847 dataset_type = DatasetType(
1848 "coadd",
1849 dimensions=["tract", "patch"],
1850 universe=registry.dimensions,
1851 storageClass="Exposure",
1852 )
1853 registry.registerDatasetType(dataset_type)
1854 registry.registerCollection("the_run", CollectionType.RUN)
1855 registry.insertDatasets(
1856 dataset_type,
1857 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1858 run="the_run",
1859 )
1860 # Query for tracts and patches that overlap some "difference" htm9
1861 # pixels; there should be overlaps, because the database has
1862 # the "large" suite of regions.
1863 self.assertEqual(
1864 {0},
1865 {
1866 data_id["tract"]
1867 for data_id in registry.queryDataIds(
1868 ["tract"],
1869 skymap=skymap_name,
1870 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1871 )
1872 },
1873 )
1874 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1875 self.assertIn(
1876 patch_id,
1877 {
1878 data_id["patch"]
1879 for data_id in registry.queryDataIds(
1880 ["patch"],
1881 skymap=skymap_name,
1882 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1883 )
1884 },
1885 )
1886 # Use sync to update the tract region and insert to update the regions
1887 # of the patches, to the "small" suite.
1888 updated = registry.syncDimensionData(
1889 "tract",
1890 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1891 update=True,
1892 )
1893 self.assertEqual(updated, {"region": parent_region_large})
1894 registry.insertDimensionData(
1895 "patch",
1896 *[
1897 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1898 for n, c in enumerate(child_regions_small)
1899 ],
1900 replace=True,
1901 )
1902 # Query again; there now should be no such overlaps, because the
1903 # database has the "small" suite of regions.
1904 self.assertFalse(
1905 set(
1906 registry.queryDataIds(
1907 ["tract"],
1908 skymap=skymap_name,
1909 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1910 )
1911 )
1912 )
1913 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1914 self.assertNotIn(
1915 patch_id,
1916 {
1917 data_id["patch"]
1918 for data_id in registry.queryDataIds(
1919 ["patch"],
1920 skymap=skymap_name,
1921 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1922 )
1923 },
1924 )
1925 # Update back to the large regions and query one more time.
1926 updated = registry.syncDimensionData(
1927 "tract",
1928 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1929 update=True,
1930 )
1931 self.assertEqual(updated, {"region": parent_region_small})
1932 registry.insertDimensionData(
1933 "patch",
1934 *[
1935 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1936 for n, c in enumerate(child_regions_large)
1937 ],
1938 replace=True,
1939 )
1940 self.assertEqual(
1941 {0},
1942 {
1943 data_id["tract"]
1944 for data_id in registry.queryDataIds(
1945 ["tract"],
1946 skymap=skymap_name,
1947 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1948 )
1949 },
1950 )
1951 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1952 self.assertIn(
1953 patch_id,
1954 {
1955 data_id["patch"]
1956 for data_id in registry.queryDataIds(
1957 ["patch"],
1958 skymap=skymap_name,
1959 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1960 )
1961 },
1962 )
1964 def testCalibrationCollections(self):
1965 """Test operations on `~CollectionType.CALIBRATION` collections,
1966 including `Registry.certify`, `Registry.decertify`, and
1967 `Registry.findDataset`.
1968 """
1969 # Setup - make a Registry, fill it with some datasets in
1970 # non-calibration collections.
1971 registry = self.makeRegistry()
1972 self.loadData(registry, "base.yaml")
1973 self.loadData(registry, "datasets.yaml")
1974 # Set up some timestamps.
1975 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
1976 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
1977 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
1978 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
1979 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
1980 allTimespans = [
1981 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1982 ]
1983 # Get references to some datasets.
1984 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1985 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1986 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1987 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1988 # Register the main calibration collection we'll be working with.
1989 collection = "Cam1/calibs/default"
1990 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1991 # Cannot associate into a calibration collection (no timespan).
1992 with self.assertRaises(CollectionTypeError):
1993 registry.associate(collection, [bias2a])
1994 # Certify 2a dataset with [t2, t4) validity.
1995 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1996 # Test that we can query for this dataset via the new collection, both
1997 # on its own and with a RUN collection, as long as we don't try to join
1998 # in temporal dimensions or use findFirst=True.
1999 self.assertEqual(
2000 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2001 {bias2a},
2002 )
2003 self.assertEqual(
2004 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2005 {
2006 bias2a,
2007 bias2b,
2008 bias3b,
2009 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2010 },
2011 )
2012 self.assertEqual(
2013 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2014 {registry.expandDataId(instrument="Cam1", detector=2)},
2015 )
2016 self.assertEqual(
2017 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2018 {
2019 registry.expandDataId(instrument="Cam1", detector=2),
2020 registry.expandDataId(instrument="Cam1", detector=3),
2021 registry.expandDataId(instrument="Cam1", detector=4),
2022 },
2023 )
2025 # We should not be able to certify 2b with anything overlapping that
2026 # window.
2027 with self.assertRaises(ConflictingDefinitionError):
2028 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2029 with self.assertRaises(ConflictingDefinitionError):
2030 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2031 with self.assertRaises(ConflictingDefinitionError):
2032 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2033 with self.assertRaises(ConflictingDefinitionError):
2034 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2035 with self.assertRaises(ConflictingDefinitionError):
2036 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2037 with self.assertRaises(ConflictingDefinitionError):
2038 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2039 with self.assertRaises(ConflictingDefinitionError):
2040 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2041 with self.assertRaises(ConflictingDefinitionError):
2042 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2043 # We should be able to certify 3a with a range overlapping that window,
2044 # because it's for a different detector.
2045 # We'll certify 3a over [t1, t3).
2046 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2047 # Now we'll certify 2b and 3b together over [t4, ∞).
2048 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2050 # Fetch all associations and check that they are what we expect.
2051 self.assertCountEqual(
2052 list(
2053 registry.queryDatasetAssociations(
2054 "bias",
2055 collections=[collection, "imported_g", "imported_r"],
2056 )
2057 ),
2058 [
2059 DatasetAssociation(
2060 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2061 collection="imported_g",
2062 timespan=None,
2063 ),
2064 DatasetAssociation(
2065 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2066 collection="imported_r",
2067 timespan=None,
2068 ),
2069 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2070 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2071 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2072 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2073 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2074 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2075 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2076 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2077 ],
2078 )
2080 class Ambiguous:
2081 """Tag class to denote lookups that should be ambiguous."""
2083 pass
2085 def assertLookup(
2086 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2087 ) -> None:
2088 """Local function that asserts that a bias lookup returns the given
2089 expected result.
2090 """
2091 if expected is Ambiguous:
2092 with self.assertRaises((DatasetTypeError, LookupError)):
2093 registry.findDataset(
2094 "bias",
2095 collections=collection,
2096 instrument="Cam1",
2097 detector=detector,
2098 timespan=timespan,
2099 )
2100 else:
2101 self.assertEqual(
2102 expected,
2103 registry.findDataset(
2104 "bias",
2105 collections=collection,
2106 instrument="Cam1",
2107 detector=detector,
2108 timespan=timespan,
2109 ),
2110 )
2112 # Systematically test lookups against expected results.
2113 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2114 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2115 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2116 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2117 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2118 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2119 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2120 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2121 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2122 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2123 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2124 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2125 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2126 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2127 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2128 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2129 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2130 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2131 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2132 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2133 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2134 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2135 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2136 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2137 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2138 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2139 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2140 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2141 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2142 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2143 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2144 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2145 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2146 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2147 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2148 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2149 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2150 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2151 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2152 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2153 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2154 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2156 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2157 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2158 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2159 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2160 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2161 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2162 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2163 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2164 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2165 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2166 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2167 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2168 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2169 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2170 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2171 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2172 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2173 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2174 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2175 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2176 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2177 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2178 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2179 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2180 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2181 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2182 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2183 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2184 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2185 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2186 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2187 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2188 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2189 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2190 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2191 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2192 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2193 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2194 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2195 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2196 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2197 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2198 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2199 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2200 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2201 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2203 # Decertify everything, this time with explicit data IDs, then check
2204 # that no lookups succeed.
2205 registry.decertify(
2206 collection,
2207 "bias",
2208 Timespan(None, None),
2209 dataIds=[
2210 dict(instrument="Cam1", detector=2),
2211 dict(instrument="Cam1", detector=3),
2212 ],
2213 )
2214 for detector in (2, 3):
2215 for timespan in allTimespans:
2216 assertLookup(detector=detector, timespan=timespan, expected=None)
2217 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2218 # those.
2219 registry.certify(
2220 collection,
2221 [bias2a, bias3a],
2222 Timespan(None, None),
2223 )
2224 for timespan in allTimespans:
2225 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2226 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2227 # Decertify just bias2 over [t2, t4).
2228 # This should split a single certification row into two (and leave the
2229 # other existing row, for bias3a, alone).
2230 registry.decertify(
2231 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2232 )
2233 for timespan in allTimespans:
2234 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2235 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2236 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2237 if overlapsBefore and overlapsAfter:
2238 expected = Ambiguous
2239 elif overlapsBefore or overlapsAfter:
2240 expected = bias2a
2241 else:
2242 expected = None
2243 assertLookup(detector=2, timespan=timespan, expected=expected)
2245 def testSkipCalibs(self):
2246 """Test how queries handle skipping of calibration collections."""
2247 registry = self.makeRegistry()
2248 self.loadData(registry, "base.yaml")
2249 self.loadData(registry, "datasets.yaml")
2251 coll_calib = "Cam1/calibs/default"
2252 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2254 # Add all biases to the calibration collection.
2255 # Without this, the logic that prunes dataset subqueries based on
2256 # datasetType-collection summary information will fire before the logic
2257 # we want to test below. This is a good thing (it avoids the dreaded
2258 # NotImplementedError a bit more often) everywhere but here.
2259 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2261 coll_list = [coll_calib, "imported_g", "imported_r"]
2262 chain = "Cam1/chain"
2263 registry.registerCollection(chain, type=CollectionType.CHAINED)
2264 registry.setCollectionChain(chain, coll_list)
2266 # explicit list will raise if findFirst=True or there are temporal
2267 # dimensions
2268 with self.assertRaises(NotImplementedError):
2269 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2270 with self.assertRaises(NotImplementedError):
2271 registry.queryDataIds(
2272 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2273 ).count()
2275 # chain will skip
2276 datasets = list(registry.queryDatasets("bias", collections=chain))
2277 self.assertGreater(len(datasets), 0)
2279 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2280 self.assertGreater(len(dataIds), 0)
2282 # glob will skip too
2283 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2284 self.assertGreater(len(datasets), 0)
2286 # regular expression will skip too
2287 pattern = re.compile(".*")
2288 datasets = list(registry.queryDatasets("bias", collections=pattern))
2289 self.assertGreater(len(datasets), 0)
2291 # ellipsis should work as usual
2292 datasets = list(registry.queryDatasets("bias", collections=...))
2293 self.assertGreater(len(datasets), 0)
2295 # few tests with findFirst
2296 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2297 self.assertGreater(len(datasets), 0)
2299 def testIngestTimeQuery(self):
2301 registry = self.makeRegistry()
2302 self.loadData(registry, "base.yaml")
2303 dt0 = datetime.utcnow()
2304 self.loadData(registry, "datasets.yaml")
2305 dt1 = datetime.utcnow()
2307 datasets = list(registry.queryDatasets(..., collections=...))
2308 len0 = len(datasets)
2309 self.assertGreater(len0, 0)
2311 where = "ingest_date > T'2000-01-01'"
2312 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2313 len1 = len(datasets)
2314 self.assertEqual(len0, len1)
2316 # no one will ever use this piece of software in 30 years
2317 where = "ingest_date > T'2050-01-01'"
2318 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2319 len2 = len(datasets)
2320 self.assertEqual(len2, 0)
2322 # Check more exact timing to make sure there is no 37 seconds offset
2323 # (after fixing DM-30124). SQLite time precision is 1 second, make
2324 # sure that we don't test with higher precision.
2325 tests = [
2326 # format: (timestamp, operator, expected_len)
2327 (dt0 - timedelta(seconds=1), ">", len0),
2328 (dt0 - timedelta(seconds=1), "<", 0),
2329 (dt1 + timedelta(seconds=1), "<", len0),
2330 (dt1 + timedelta(seconds=1), ">", 0),
2331 ]
2332 for dt, op, expect_len in tests:
2333 dt_str = dt.isoformat(sep=" ")
2335 where = f"ingest_date {op} T'{dt_str}'"
2336 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2337 self.assertEqual(len(datasets), expect_len)
2339 # same with bind using datetime or astropy Time
2340 where = f"ingest_date {op} ingest_time"
2341 datasets = list(
2342 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2343 )
2344 self.assertEqual(len(datasets), expect_len)
2346 dt_astropy = astropy.time.Time(dt, format="datetime")
2347 datasets = list(
2348 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2349 )
2350 self.assertEqual(len(datasets), expect_len)
2352 def testTimespanQueries(self):
2353 """Test query expressions involving timespans."""
2354 registry = self.makeRegistry()
2355 self.loadData(registry, "hsc-rc2-subset.yaml")
2356 # All exposures in the database; mapping from ID to timespan.
2357 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2358 # Just those IDs, sorted (which is also temporal sorting, because HSC
2359 # exposure IDs are monotonically increasing).
2360 ids = sorted(visits.keys())
2361 self.assertGreater(len(ids), 20)
2362 # Pick some quasi-random indexes into `ids` to play with.
2363 i1 = int(len(ids) * 0.1)
2364 i2 = int(len(ids) * 0.3)
2365 i3 = int(len(ids) * 0.6)
2366 i4 = int(len(ids) * 0.8)
2367 # Extract some times from those: just before the beginning of i1 (which
2368 # should be after the end of the exposure before), exactly the
2369 # beginning of i2, just after the beginning of i3 (and before its end),
2370 # and the exact end of i4.
2371 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2372 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2373 t2 = visits[ids[i2]].begin
2374 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2375 self.assertLess(t3, visits[ids[i3]].end)
2376 t4 = visits[ids[i4]].end
2377 # Make sure those are actually in order.
2378 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2380 bind = {
2381 "t1": t1,
2382 "t2": t2,
2383 "t3": t3,
2384 "t4": t4,
2385 "ts23": Timespan(t2, t3),
2386 }
2388 def query(where):
2389 """Helper function that queries for visit data IDs and returns
2390 results as a sorted, deduplicated list of visit IDs.
2391 """
2392 return sorted(
2393 {
2394 dataId["visit"]
2395 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2396 }
2397 )
2399 # Try a bunch of timespan queries, mixing up the bounds themselves,
2400 # where they appear in the expression, and how we get the timespan into
2401 # the expression.
2403 # t1 is before the start of i1, so this should not include i1.
2404 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2405 # t2 is exactly at the start of i2, but ends are exclusive, so these
2406 # should not include i2.
2407 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2408 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2409 # t3 is in the middle of i3, so this should include i3.
2410 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2411 # This one should not include t3 by the same reasoning.
2412 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2413 # t4 is exactly at the end of i4, so this should include i4.
2414 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2415 # i4's upper bound of t4 is exclusive so this should not include t4.
2416 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2418 # Now some timespan vs. time scalar queries.
2419 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2420 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2421 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2422 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2423 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2424 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2426 # Empty timespans should not overlap anything.
2427 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2429 def testCollectionSummaries(self):
2430 """Test recording and retrieval of collection summaries."""
2431 self.maxDiff = None
2432 registry = self.makeRegistry()
2433 # Importing datasets from yaml should go through the code path where
2434 # we update collection summaries as we insert datasets.
2435 self.loadData(registry, "base.yaml")
2436 self.loadData(registry, "datasets.yaml")
2437 flat = registry.getDatasetType("flat")
2438 expected1 = CollectionSummary()
2439 expected1.dataset_types.add(registry.getDatasetType("bias"))
2440 expected1.add_data_ids(
2441 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2442 )
2443 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2444 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2445 # Create a chained collection with both of the imported runs; the
2446 # summary should be the same, because it's a union with itself.
2447 chain = "chain"
2448 registry.registerCollection(chain, CollectionType.CHAINED)
2449 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2450 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2451 # Associate flats only into a tagged collection and a calibration
2452 # collection to check summaries of those.
2453 tag = "tag"
2454 registry.registerCollection(tag, CollectionType.TAGGED)
2455 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2456 calibs = "calibs"
2457 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2458 registry.certify(
2459 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2460 )
2461 expected2 = expected1.copy()
2462 expected2.dataset_types.discard("bias")
2463 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2464 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2465 # Explicitly calling Registry.refresh() should load those same
2466 # summaries, via a totally different code path.
2467 registry.refresh()
2468 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2469 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2470 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2471 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2473 def testBindInQueryDatasets(self):
2474 """Test that the bind parameter is correctly forwarded in
2475 queryDatasets recursion.
2476 """
2477 registry = self.makeRegistry()
2478 # Importing datasets from yaml should go through the code path where
2479 # we update collection summaries as we insert datasets.
2480 self.loadData(registry, "base.yaml")
2481 self.loadData(registry, "datasets.yaml")
2482 self.assertEqual(
2483 set(registry.queryDatasets("flat", band="r", collections=...)),
2484 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2485 )
2487 def testQueryIntRangeExpressions(self):
2488 """Test integer range expressions in ``where`` arguments.
2490 Note that our expressions use inclusive stop values, unlike Python's.
2491 """
2492 registry = self.makeRegistry()
2493 self.loadData(registry, "base.yaml")
2494 self.assertEqual(
2495 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2496 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2497 )
2498 self.assertEqual(
2499 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2500 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2501 )
2502 self.assertEqual(
2503 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2504 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2505 )
2507 def testQueryResultSummaries(self):
2508 """Test summary methods like `count`, `any`, and `explain_no_results`
2509 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2510 """
2511 registry = self.makeRegistry()
2512 self.loadData(registry, "base.yaml")
2513 self.loadData(registry, "datasets.yaml")
2514 self.loadData(registry, "spatial.yaml")
2515 # Default test dataset has two collections, each with both flats and
2516 # biases. Add a new collection with only biases.
2517 registry.registerCollection("biases", CollectionType.TAGGED)
2518 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2519 # First query yields two results, and involves no postprocessing.
2520 query1 = registry.queryDataIds(["physical_filter"], band="r")
2521 self.assertTrue(query1.any(execute=False, exact=False))
2522 self.assertTrue(query1.any(execute=True, exact=False))
2523 self.assertTrue(query1.any(execute=True, exact=True))
2524 self.assertEqual(query1.count(exact=False), 2)
2525 self.assertEqual(query1.count(exact=True), 2)
2526 self.assertFalse(list(query1.explain_no_results()))
2527 # Second query should yield no results, which we should see when
2528 # we attempt to expand the data ID.
2529 query2 = registry.queryDataIds(["physical_filter"], band="h")
2530 # There's no execute=False, exact=Fals test here because the behavior
2531 # not something we want to guarantee in this case (and exact=False
2532 # says either answer is legal).
2533 self.assertFalse(query2.any(execute=True, exact=False))
2534 self.assertFalse(query2.any(execute=True, exact=True))
2535 self.assertEqual(query2.count(exact=False), 0)
2536 self.assertEqual(query2.count(exact=True), 0)
2537 self.assertTrue(list(query2.explain_no_results()))
2538 # These queries yield no results due to various problems that can be
2539 # spotted prior to execution, yielding helpful diagnostics.
2540 base_query = registry.queryDataIds(["detector", "physical_filter"])
2541 queries_and_snippets = [
2542 (
2543 # Dataset type name doesn't match any existing dataset types.
2544 registry.queryDatasets("nonexistent", collections=...),
2545 ["nonexistent"],
2546 ),
2547 (
2548 # Dataset type object isn't registered.
2549 registry.queryDatasets(
2550 DatasetType(
2551 "nonexistent",
2552 dimensions=["instrument"],
2553 universe=registry.dimensions,
2554 storageClass="Image",
2555 ),
2556 collections=...,
2557 ),
2558 ["nonexistent"],
2559 ),
2560 (
2561 # No datasets of this type in this collection.
2562 registry.queryDatasets("flat", collections=["biases"]),
2563 ["flat", "biases"],
2564 ),
2565 (
2566 # No datasets of this type in this collection.
2567 base_query.findDatasets("flat", collections=["biases"]),
2568 ["flat", "biases"],
2569 ),
2570 (
2571 # No collections matching at all.
2572 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2573 ["potato"],
2574 ),
2575 ]
2576 # The behavior of these additional queries is slated to change in the
2577 # future, so we also check for deprecation warnings.
2578 with self.assertWarns(FutureWarning):
2579 queries_and_snippets.append(
2580 (
2581 # Dataset type name doesn't match any existing dataset
2582 # types.
2583 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2584 ["nonexistent"],
2585 )
2586 )
2587 with self.assertWarns(FutureWarning):
2588 queries_and_snippets.append(
2589 (
2590 # Dataset type name doesn't match any existing dataset
2591 # types.
2592 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2593 ["nonexistent"],
2594 )
2595 )
2596 for query, snippets in queries_and_snippets:
2597 self.assertFalse(query.any(execute=False, exact=False))
2598 self.assertFalse(query.any(execute=True, exact=False))
2599 self.assertFalse(query.any(execute=True, exact=True))
2600 self.assertEqual(query.count(exact=False), 0)
2601 self.assertEqual(query.count(exact=True), 0)
2602 messages = list(query.explain_no_results())
2603 self.assertTrue(messages)
2604 # Want all expected snippets to appear in at least one message.
2605 self.assertTrue(
2606 any(
2607 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2608 ),
2609 messages,
2610 )
2612 # This query does yield results, but should also emit a warning because
2613 # dataset type patterns to queryDataIds is deprecated; just look for
2614 # the warning.
2615 with self.assertWarns(FutureWarning):
2616 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2618 # These queries yield no results due to problems that can be identified
2619 # by cheap follow-up queries, yielding helpful diagnostics.
2620 for query, snippets in [
2621 (
2622 # No records for one of the involved dimensions.
2623 registry.queryDataIds(["subfilter"]),
2624 ["no rows", "subfilter"],
2625 ),
2626 (
2627 # No records for one of the involved dimensions.
2628 registry.queryDimensionRecords("subfilter"),
2629 ["no rows", "subfilter"],
2630 ),
2631 ]:
2632 self.assertFalse(query.any(execute=True, exact=False))
2633 self.assertFalse(query.any(execute=True, exact=True))
2634 self.assertEqual(query.count(exact=True), 0)
2635 messages = list(query.explain_no_results())
2636 self.assertTrue(messages)
2637 # Want all expected snippets to appear in at least one message.
2638 self.assertTrue(
2639 any(
2640 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2641 ),
2642 messages,
2643 )
2645 # This query yields four overlaps in the database, but one is filtered
2646 # out in postprocessing. The count queries aren't accurate because
2647 # they don't account for duplication that happens due to an internal
2648 # join against commonSkyPix.
2649 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2650 self.assertEqual(
2651 {
2652 DataCoordinate.standardize(
2653 instrument="Cam1",
2654 skymap="SkyMap1",
2655 visit=v,
2656 tract=t,
2657 universe=registry.dimensions,
2658 )
2659 for v, t in [(1, 0), (2, 0), (2, 1)]
2660 },
2661 set(query3),
2662 )
2663 self.assertTrue(query3.any(execute=False, exact=False))
2664 self.assertTrue(query3.any(execute=True, exact=False))
2665 self.assertTrue(query3.any(execute=True, exact=True))
2666 self.assertGreaterEqual(query3.count(exact=False), 4)
2667 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2668 self.assertFalse(list(query3.explain_no_results()))
2669 # This query yields overlaps in the database, but all are filtered
2670 # out in postprocessing. The count queries again aren't very useful.
2671 # We have to use `where=` here to avoid an optimization that
2672 # (currently) skips the spatial postprocess-filtering because it
2673 # recognizes that no spatial join is necessary. That's not ideal, but
2674 # fixing it is out of scope for this ticket.
2675 query4 = registry.queryDataIds(
2676 ["visit", "tract"],
2677 instrument="Cam1",
2678 skymap="SkyMap1",
2679 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2680 )
2681 self.assertFalse(set(query4))
2682 self.assertTrue(query4.any(execute=False, exact=False))
2683 self.assertTrue(query4.any(execute=True, exact=False))
2684 self.assertFalse(query4.any(execute=True, exact=True))
2685 self.assertGreaterEqual(query4.count(exact=False), 1)
2686 self.assertEqual(query4.count(exact=True, discard=True), 0)
2687 messages = query4.explain_no_results()
2688 self.assertTrue(messages)
2689 self.assertTrue(any("overlap" in message for message in messages))
2690 # This query should yield results from one dataset type but not the
2691 # other, which is not registered.
2692 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2693 self.assertTrue(set(query5))
2694 self.assertTrue(query5.any(execute=False, exact=False))
2695 self.assertTrue(query5.any(execute=True, exact=False))
2696 self.assertTrue(query5.any(execute=True, exact=True))
2697 self.assertGreaterEqual(query5.count(exact=False), 1)
2698 self.assertGreaterEqual(query5.count(exact=True), 1)
2699 self.assertFalse(list(query5.explain_no_results()))
2700 # This query applies a selection that yields no results, fully in the
2701 # database. Explaining why it fails involves traversing the relation
2702 # tree and running a LIMIT 1 query at each level that has the potential
2703 # to remove rows.
2704 query6 = registry.queryDimensionRecords(
2705 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2706 )
2707 self.assertEqual(query6.count(exact=True), 0)
2708 messages = query6.explain_no_results()
2709 self.assertTrue(messages)
2710 self.assertTrue(any("no-purpose" in message for message in messages))
2712 def testQueryDataIdsOrderBy(self):
2713 """Test order_by and limit on result returned by queryDataIds()."""
2714 registry = self.makeRegistry()
2715 self.loadData(registry, "base.yaml")
2716 self.loadData(registry, "datasets.yaml")
2717 self.loadData(registry, "spatial.yaml")
2719 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2720 return registry.queryDataIds(
2721 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2722 )
2724 Test = namedtuple(
2725 "testQueryDataIdsOrderByTest",
2726 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2727 defaults=(None, None, None),
2728 )
2730 test_data = (
2731 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2732 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2733 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2734 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2735 Test(
2736 "tract.id,visit.id",
2737 "tract,visit",
2738 ((0, 1), (0, 1), (0, 2)),
2739 limit=(3,),
2740 ),
2741 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2742 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2743 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2744 Test(
2745 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2746 ),
2747 Test(
2748 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2749 ),
2750 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2751 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2752 Test(
2753 "tract,-timespan.begin,timespan.end",
2754 "tract,visit",
2755 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2756 ),
2757 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2758 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2759 Test(
2760 "tract,detector",
2761 "tract,detector",
2762 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2763 datasets="flat",
2764 collections="imported_r",
2765 ),
2766 Test(
2767 "tract,detector.full_name",
2768 "tract,detector",
2769 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2770 datasets="flat",
2771 collections="imported_r",
2772 ),
2773 Test(
2774 "tract,detector.raft,detector.name_in_raft",
2775 "tract,detector",
2776 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2777 datasets="flat",
2778 collections="imported_r",
2779 ),
2780 )
2782 for test in test_data:
2783 order_by = test.order_by.split(",")
2784 keys = test.keys.split(",")
2785 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2786 if test.limit is not None:
2787 query = query.limit(*test.limit)
2788 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2789 self.assertEqual(dataIds, test.result)
2791 # and materialize
2792 query = do_query(keys).order_by(*order_by)
2793 if test.limit is not None:
2794 query = query.limit(*test.limit)
2795 with self.assertRaises(RelationalAlgebraError):
2796 with query.materialize():
2797 pass
2799 # errors in a name
2800 for order_by in ("", "-"):
2801 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2802 list(do_query().order_by(order_by))
2804 for order_by in ("undimension.name", "-undimension.name"):
2805 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2806 list(do_query().order_by(order_by))
2808 for order_by in ("attract", "-attract"):
2809 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2810 list(do_query().order_by(order_by))
2812 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2813 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2815 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2816 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2818 with self.assertRaisesRegex(
2819 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2820 ):
2821 list(do_query(("tract")).order_by("timespan.begin"))
2823 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2824 list(do_query(("tract")).order_by("tract.timespan.begin"))
2826 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2827 list(do_query(("tract")).order_by("tract.name"))
2829 def testQueryDataIdsGovernorExceptions(self):
2830 """Test exceptions raised by queryDataIds() for incorrect governors."""
2831 registry = self.makeRegistry()
2832 self.loadData(registry, "base.yaml")
2833 self.loadData(registry, "datasets.yaml")
2834 self.loadData(registry, "spatial.yaml")
2836 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2837 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2839 Test = namedtuple(
2840 "testQueryDataIdExceptionsTest",
2841 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2842 defaults=(None, None, None, {}, None, 0),
2843 )
2845 test_data = (
2846 Test("tract,visit", count=6),
2847 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2848 Test(
2849 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2850 ),
2851 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2852 Test(
2853 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2854 ),
2855 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2856 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2857 Test(
2858 "tract,visit",
2859 where="instrument=cam AND skymap=map",
2860 bind={"cam": "Cam1", "map": "SkyMap1"},
2861 count=6,
2862 ),
2863 Test(
2864 "tract,visit",
2865 where="instrument=cam AND skymap=map",
2866 bind={"cam": "Cam", "map": "SkyMap"},
2867 exception=DataIdValueError,
2868 ),
2869 )
2871 for test in test_data:
2872 dimensions = test.dimensions.split(",")
2873 if test.exception:
2874 with self.assertRaises(test.exception):
2875 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2876 else:
2877 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2878 self.assertEqual(query.count(discard=True), test.count)
2880 # and materialize
2881 if test.exception:
2882 with self.assertRaises(test.exception):
2883 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2884 with query.materialize() as materialized:
2885 materialized.count(discard=True)
2886 else:
2887 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2888 with query.materialize() as materialized:
2889 self.assertEqual(materialized.count(discard=True), test.count)
2891 def testQueryDimensionRecordsOrderBy(self):
2892 """Test order_by and limit on result returned by
2893 queryDimensionRecords().
2894 """
2895 registry = self.makeRegistry()
2896 self.loadData(registry, "base.yaml")
2897 self.loadData(registry, "datasets.yaml")
2898 self.loadData(registry, "spatial.yaml")
2900 def do_query(element, datasets=None, collections=None):
2901 return registry.queryDimensionRecords(
2902 element, instrument="Cam1", datasets=datasets, collections=collections
2903 )
2905 query = do_query("detector")
2906 self.assertEqual(len(list(query)), 4)
2908 Test = namedtuple(
2909 "testQueryDataIdsOrderByTest",
2910 ("element", "order_by", "result", "limit", "datasets", "collections"),
2911 defaults=(None, None, None),
2912 )
2914 test_data = (
2915 Test("detector", "detector", (1, 2, 3, 4)),
2916 Test("detector", "-detector", (4, 3, 2, 1)),
2917 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2918 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2919 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2920 Test("visit", "visit", (1, 2)),
2921 Test("visit", "-visit.id", (2, 1)),
2922 Test("visit", "zenith_angle", (1, 2)),
2923 Test("visit", "-visit.name", (2, 1)),
2924 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2925 )
2927 for test in test_data:
2928 order_by = test.order_by.split(",")
2929 query = do_query(test.element).order_by(*order_by)
2930 if test.limit is not None:
2931 query = query.limit(*test.limit)
2932 dataIds = tuple(rec.id for rec in query)
2933 self.assertEqual(dataIds, test.result)
2935 # errors in a name
2936 for order_by in ("", "-"):
2937 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2938 list(do_query("detector").order_by(order_by))
2940 for order_by in ("undimension.name", "-undimension.name"):
2941 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2942 list(do_query("detector").order_by(order_by))
2944 for order_by in ("attract", "-attract"):
2945 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
2946 list(do_query("detector").order_by(order_by))
2948 def testQueryDimensionRecordsExceptions(self):
2949 """Test exceptions raised by queryDimensionRecords()."""
2950 registry = self.makeRegistry()
2951 self.loadData(registry, "base.yaml")
2952 self.loadData(registry, "datasets.yaml")
2953 self.loadData(registry, "spatial.yaml")
2955 result = registry.queryDimensionRecords("detector")
2956 self.assertEqual(result.count(), 4)
2957 result = registry.queryDimensionRecords("detector", instrument="Cam1")
2958 self.assertEqual(result.count(), 4)
2959 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
2960 self.assertEqual(result.count(), 4)
2961 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
2962 self.assertEqual(result.count(), 4)
2963 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
2964 self.assertEqual(result.count(), 4)
2966 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
2967 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
2968 result.count()
2970 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
2971 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
2972 result.count()
2974 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2975 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
2976 result.count()
2978 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
2979 result = registry.queryDimensionRecords(
2980 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
2981 )
2982 result.count()
2984 def testDatasetConstrainedDimensionRecordQueries(self):
2985 """Test that queryDimensionRecords works even when given a dataset
2986 constraint whose dimensions extend beyond the requested dimension
2987 element's.
2988 """
2989 registry = self.makeRegistry()
2990 self.loadData(registry, "base.yaml")
2991 self.loadData(registry, "datasets.yaml")
2992 # Query for physical_filter dimension records, using a dataset that
2993 # has both physical_filter and dataset dimensions.
2994 records = registry.queryDimensionRecords(
2995 "physical_filter",
2996 datasets=["flat"],
2997 collections="imported_r",
2998 )
2999 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3000 # Trying to constrain by all dataset types is an error.
3001 with self.assertRaises(TypeError):
3002 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3004 def testSkyPixDatasetQueries(self):
3005 """Test that we can build queries involving skypix dimensions as long
3006 as a dataset type that uses those dimensions is included.
3007 """
3008 registry = self.makeRegistry()
3009 self.loadData(registry, "base.yaml")
3010 dataset_type = DatasetType(
3011 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3012 )
3013 registry.registerDatasetType(dataset_type)
3014 run = "r"
3015 registry.registerRun(run)
3016 # First try queries where there are no datasets; the concern is whether
3017 # we can even build and execute these queries without raising, even
3018 # when "doomed" query shortcuts are in play.
3019 self.assertFalse(
3020 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3021 )
3022 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3023 # Now add a dataset and see that we can get it back.
3024 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3025 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3026 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3027 self.assertEqual(
3028 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3029 {data_id},
3030 )
3031 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3033 def testDatasetIdFactory(self):
3034 """Simple test for DatasetIdFactory, mostly to catch potential changes
3035 in its API.
3036 """
3037 registry = self.makeRegistry()
3038 factory = registry.datasetIdFactory
3039 dataset_type = DatasetType(
3040 "datasetType",
3041 dimensions=["detector", "instrument"],
3042 universe=registry.dimensions,
3043 storageClass="int",
3044 )
3045 run = "run"
3046 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3048 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3049 self.assertIsInstance(datasetId, uuid.UUID)
3050 self.assertEqual(datasetId.version, 4)
3052 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3053 self.assertIsInstance(datasetId, uuid.UUID)
3054 self.assertEqual(datasetId.version, 5)
3056 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3057 self.assertIsInstance(datasetId, uuid.UUID)
3058 self.assertEqual(datasetId.version, 5)