Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27from datetime import datetime, timedelta
28import itertools
29import logging
30import os
31import re
32from typing import Iterator
33import unittest
34import uuid
36import astropy.time
37import sqlalchemy
38from typing import Optional, Type, Union, TYPE_CHECKING
40try:
41 import numpy as np
42except ImportError:
43 np = None
45import lsst.sphgeom
46from ...core import (
47 DataCoordinate,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 ddl,
56 Timespan,
57)
58from ..interfaces import DatasetIdGenEnum
59from ..summaries import CollectionSummary
60from .._collectionType import CollectionType
61from .._config import RegistryConfig
63from .._exceptions import (
64 ConflictingDefinitionError,
65 InconsistentDataIdError,
66 MissingCollectionError,
67 OrphanedRecordError,
68)
69from ..interfaces import ButlerAttributeExistsError
71if TYPE_CHECKING: 71 ↛ 72line 71 didn't jump to line 72, because the condition on line 71 was never true
72 from .._registry import Registry
75class RegistryTests(ABC):
76 """Generic tests for the `Registry` class that can be subclassed to
77 generate tests for different configurations.
78 """
80 collectionsManager: Optional[str] = None
81 """Name of the collections manager class, if subclass provides value for
82 this member then it overrides name specified in default configuration
83 (`str`).
84 """
86 datasetsManager: Optional[str] = None
87 """Name of the datasets manager class, if subclass provides value for
88 this member then it overrides name specified in default configuration
89 (`str`).
90 """
92 @classmethod
93 @abstractmethod
94 def getDataDir(cls) -> str:
95 """Return the root directory containing test data YAML files.
96 """
97 raise NotImplementedError()
99 def makeRegistryConfig(self) -> RegistryConfig:
100 """Create RegistryConfig used to create a registry.
102 This method should be called by a subclass from `makeRegistry`.
103 Returned instance will be pre-configured based on the values of class
104 members, and default-configured for all other parametrs. Subclasses
105 that need default configuration should just instantiate
106 `RegistryConfig` directly.
107 """
108 config = RegistryConfig()
109 if self.collectionsManager:
110 config["managers", "collections"] = self.collectionsManager
111 if self.datasetsManager:
112 config["managers", "datasets"] = self.datasetsManager
113 return config
115 @abstractmethod
116 def makeRegistry(self) -> Registry:
117 """Return the Registry instance to be tested.
118 """
119 raise NotImplementedError()
121 def loadData(self, registry: Registry, filename: str):
122 """Load registry test data from ``getDataDir/<filename>``,
123 which should be a YAML import/export file.
124 """
125 from ...transfers import YamlRepoImportBackend
126 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
127 backend = YamlRepoImportBackend(stream, registry)
128 backend.register()
129 backend.load(datastore=None)
131 def checkQueryResults(self, results, expected):
132 """Check that a query results object contains expected values.
134 Parameters
135 ----------
136 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
137 A lazy-evaluation query results object.
138 expected : `list`
139 A list of `DataCoordinate` o `DatasetRef` objects that should be
140 equal to results of the query, aside from ordering.
141 """
142 self.assertCountEqual(list(results), expected)
143 self.assertEqual(results.count(), len(expected))
144 if expected:
145 self.assertTrue(results.any())
146 else:
147 self.assertFalse(results.any())
149 def testOpaque(self):
150 """Tests for `Registry.registerOpaqueTable`,
151 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
152 `Registry.deleteOpaqueData`.
153 """
154 registry = self.makeRegistry()
155 table = "opaque_table_for_testing"
156 registry.registerOpaqueTable(
157 table,
158 spec=ddl.TableSpec(
159 fields=[
160 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
161 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
162 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
163 ],
164 )
165 )
166 rows = [
167 {"id": 1, "name": "one", "count": None},
168 {"id": 2, "name": "two", "count": 5},
169 {"id": 3, "name": "three", "count": 6},
170 ]
171 registry.insertOpaqueData(table, *rows)
172 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
173 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
174 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
175 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
176 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
177 # Test very long IN clause which exceeds sqlite limit on number of
178 # parameters. SQLite says the limit is 32k but it looks like it is
179 # much higher.
180 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
181 # Two IN clauses, each longer than 1k batch size, first with
182 # duplicates, second has matching elements in different batches (after
183 # sorting).
184 self.assertEqual(rows[0:2], list(registry.fetchOpaqueData(
185 table,
186 id=list(range(1000)) + list(range(100, 0, -1)),
187 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"])))
188 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
189 registry.deleteOpaqueData(table, id=3)
190 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
191 registry.deleteOpaqueData(table)
192 self.assertEqual([], list(registry.fetchOpaqueData(table)))
194 def testDatasetType(self):
195 """Tests for `Registry.registerDatasetType` and
196 `Registry.getDatasetType`.
197 """
198 registry = self.makeRegistry()
199 # Check valid insert
200 datasetTypeName = "test"
201 storageClass = StorageClass("testDatasetType")
202 registry.storageClasses.registerStorageClass(storageClass)
203 dimensions = registry.dimensions.extract(("instrument", "visit"))
204 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
205 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
206 # Inserting for the first time should return True
207 self.assertTrue(registry.registerDatasetType(inDatasetType))
208 outDatasetType1 = registry.getDatasetType(datasetTypeName)
209 self.assertEqual(outDatasetType1, inDatasetType)
211 # Re-inserting should work
212 self.assertFalse(registry.registerDatasetType(inDatasetType))
213 # Except when they are not identical
214 with self.assertRaises(ConflictingDefinitionError):
215 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
216 registry.registerDatasetType(nonIdenticalDatasetType)
218 # Template can be None
219 datasetTypeName = "testNoneTemplate"
220 storageClass = StorageClass("testDatasetType2")
221 registry.storageClasses.registerStorageClass(storageClass)
222 dimensions = registry.dimensions.extract(("instrument", "visit"))
223 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
224 registry.registerDatasetType(inDatasetType)
225 outDatasetType2 = registry.getDatasetType(datasetTypeName)
226 self.assertEqual(outDatasetType2, inDatasetType)
228 allTypes = set(registry.queryDatasetTypes())
229 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
231 def testDimensions(self):
232 """Tests for `Registry.insertDimensionData`,
233 `Registry.syncDimensionData`, and `Registry.expandDataId`.
234 """
235 registry = self.makeRegistry()
236 dimensionName = "instrument"
237 dimension = registry.dimensions[dimensionName]
238 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
239 "class_name": "lsst.obs.base.Instrument"}
240 registry.insertDimensionData(dimensionName, dimensionValue)
241 # Inserting the same value twice should fail
242 with self.assertRaises(sqlalchemy.exc.IntegrityError):
243 registry.insertDimensionData(dimensionName, dimensionValue)
244 # expandDataId should retrieve the record we just inserted
245 self.assertEqual(
246 registry.expandDataId(
247 instrument="DummyCam",
248 graph=dimension.graph
249 ).records[dimensionName].toDict(),
250 dimensionValue
251 )
252 # expandDataId should raise if there is no record with the given ID.
253 with self.assertRaises(LookupError):
254 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
255 # band doesn't have a table; insert should fail.
256 with self.assertRaises(TypeError):
257 registry.insertDimensionData("band", {"band": "i"})
258 dimensionName2 = "physical_filter"
259 dimension2 = registry.dimensions[dimensionName2]
260 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
261 # Missing required dependency ("instrument") should fail
262 with self.assertRaises(KeyError):
263 registry.insertDimensionData(dimensionName2, dimensionValue2)
264 # Adding required dependency should fix the failure
265 dimensionValue2["instrument"] = "DummyCam"
266 registry.insertDimensionData(dimensionName2, dimensionValue2)
267 # expandDataId should retrieve the record we just inserted.
268 self.assertEqual(
269 registry.expandDataId(
270 instrument="DummyCam", physical_filter="DummyCam_i",
271 graph=dimension2.graph
272 ).records[dimensionName2].toDict(),
273 dimensionValue2
274 )
275 # Use syncDimensionData to insert a new record successfully.
276 dimensionName3 = "detector"
277 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
278 "name_in_raft": "zero", "purpose": "SCIENCE"}
279 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
280 # Sync that again. Note that one field ("raft") is NULL, and that
281 # should be okay.
282 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
283 # Now try that sync with the same primary key but a different value.
284 # This should fail.
285 with self.assertRaises(ConflictingDefinitionError):
286 registry.syncDimensionData(
287 dimensionName3,
288 {"instrument": "DummyCam", "id": 1, "full_name": "one",
289 "name_in_raft": "four", "purpose": "SCIENCE"}
290 )
292 @unittest.skipIf(np is None, "numpy not available.")
293 def testNumpyDataId(self):
294 """Test that we can use a numpy int in a dataId."""
295 registry = self.makeRegistry()
296 dimensionEntries = [
297 ("instrument", {"instrument": "DummyCam"}),
298 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
299 # Using an np.int64 here fails unless Records.fromDict is also
300 # patched to look for numbers.Integral
301 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
302 ]
303 for args in dimensionEntries:
304 registry.insertDimensionData(*args)
306 # Try a normal integer and something that looks like an int but
307 # is not.
308 for visit_id in (42, np.int64(42)):
309 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
310 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
311 self.assertEqual(expanded["visit"], int(visit_id))
312 self.assertIsInstance(expanded["visit"], int)
314 def testDataIdRelationships(self):
315 """Test that `Registry.expandDataId` raises an exception when the given
316 keys are inconsistent.
317 """
318 registry = self.makeRegistry()
319 self.loadData(registry, "base.yaml")
320 # Insert a few more dimension records for the next test.
321 registry.insertDimensionData(
322 "exposure",
323 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
324 )
325 registry.insertDimensionData(
326 "exposure",
327 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
328 )
329 registry.insertDimensionData(
330 "visit_system",
331 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
332 )
333 registry.insertDimensionData(
334 "visit",
335 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
336 )
337 registry.insertDimensionData(
338 "visit_definition",
339 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
340 )
341 with self.assertRaises(InconsistentDataIdError):
342 registry.expandDataId(
343 {"instrument": "Cam1", "visit": 1, "exposure": 2},
344 )
346 def testDataset(self):
347 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
348 and `Registry.removeDatasets`.
349 """
350 registry = self.makeRegistry()
351 self.loadData(registry, "base.yaml")
352 run = "test"
353 registry.registerRun(run)
354 datasetType = registry.getDatasetType("bias")
355 dataId = {"instrument": "Cam1", "detector": 2}
356 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
357 outRef = registry.getDataset(ref.id)
358 self.assertIsNotNone(ref.id)
359 self.assertEqual(ref, outRef)
360 with self.assertRaises(ConflictingDefinitionError):
361 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
362 registry.removeDatasets([ref])
363 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
365 def testFindDataset(self):
366 """Tests for `Registry.findDataset`.
367 """
368 registry = self.makeRegistry()
369 self.loadData(registry, "base.yaml")
370 run = "test"
371 datasetType = registry.getDatasetType("bias")
372 dataId = {"instrument": "Cam1", "detector": 4}
373 registry.registerRun(run)
374 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
375 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
376 self.assertEqual(outputRef, inputRef)
377 # Check that retrieval with invalid dataId raises
378 with self.assertRaises(LookupError):
379 dataId = {"instrument": "Cam1"} # no detector
380 registry.findDataset(datasetType, dataId, collections=run)
381 # Check that different dataIds match to different datasets
382 dataId1 = {"instrument": "Cam1", "detector": 1}
383 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
384 dataId2 = {"instrument": "Cam1", "detector": 2}
385 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
386 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
387 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
388 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
389 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
390 # Check that requesting a non-existing dataId returns None
391 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
392 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
394 def testRemoveDatasetTypeSuccess(self):
395 """Test that Registry.removeDatasetType works when there are no
396 datasets of that type present.
397 """
398 registry = self.makeRegistry()
399 self.loadData(registry, "base.yaml")
400 registry.removeDatasetType("flat")
401 with self.assertRaises(KeyError):
402 registry.getDatasetType("flat")
404 def testRemoveDatasetTypeFailure(self):
405 """Test that Registry.removeDatasetType raises when there are datasets
406 of that type present or if the dataset type is for a component.
407 """
408 registry = self.makeRegistry()
409 self.loadData(registry, "base.yaml")
410 self.loadData(registry, "datasets.yaml")
411 with self.assertRaises(OrphanedRecordError):
412 registry.removeDatasetType("flat")
413 with self.assertRaises(ValueError):
414 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
416 def testImportDatasetsUUID(self):
417 """Test for `Registry._importDatasets` with UUID dataset ID.
418 """
419 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
420 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
422 registry = self.makeRegistry()
423 self.loadData(registry, "base.yaml")
424 for run in range(6):
425 registry.registerRun(f"run{run}")
426 datasetTypeBias = registry.getDatasetType("bias")
427 datasetTypeFlat = registry.getDatasetType("flat")
428 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
429 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
430 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
432 dataset_id = uuid.uuid4()
433 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
434 ref1, = registry._importDatasets([ref])
435 # UUID is used without change
436 self.assertEqual(ref.id, ref1.id)
438 # All different failure modes
439 refs = (
440 # Importing same DatasetRef with different dataset ID is an error
441 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
442 # Same DatasetId but different DataId
443 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
444 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
445 # Same DatasetRef and DatasetId but different run
446 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
447 )
448 for ref in refs:
449 with self.assertRaises(ConflictingDefinitionError):
450 registry._importDatasets([ref])
452 # Test for non-unique IDs, they can be re-imported multiple times.
453 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
454 with self.subTest(idGenMode=idGenMode):
456 # Use integer dataset ID to force UUID calculation in _import
457 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
458 ref1, = registry._importDatasets([ref], idGenerationMode=idGenMode)
459 self.assertIsInstance(ref1.id, uuid.UUID)
460 self.assertEqual(ref1.id.version, 5)
462 # Importing it again is OK
463 ref2, = registry._importDatasets([ref1])
464 self.assertEqual(ref2.id, ref1.id)
466 # Cannot import to different run with the same ID
467 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
468 with self.assertRaises(ConflictingDefinitionError):
469 registry._importDatasets([ref])
471 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
472 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
473 # Cannot import same DATAID_TYPE ref into a new run
474 with self.assertRaises(ConflictingDefinitionError):
475 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
476 else:
477 # DATAID_TYPE_RUN ref can be imported into a new run
478 ref2, = registry._importDatasets([ref], idGenerationMode=idGenMode)
480 def testImportDatasetsInt(self):
481 """Test for `Registry._importDatasets` with integer dataset ID.
482 """
483 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManager"):
484 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
486 registry = self.makeRegistry()
487 self.loadData(registry, "base.yaml")
488 run = "test"
489 registry.registerRun(run)
490 datasetTypeBias = registry.getDatasetType("bias")
491 datasetTypeFlat = registry.getDatasetType("flat")
492 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
493 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
494 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
495 dataset_id = 999999999
497 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run=run)
498 ref1, = registry._importDatasets([ref])
499 # Should make new integer ID.
500 self.assertNotEqual(ref1.id, ref.id)
502 # Ingesting same dataId with different dataset ID is an error
503 ref2 = ref1.unresolved().resolved(dataset_id, run=run)
504 with self.assertRaises(ConflictingDefinitionError):
505 registry._importDatasets([ref2])
507 # Ingesting different dataId with the same dataset ID should work
508 ref3 = DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run=run)
509 ref4, = registry._importDatasets([ref3])
510 self.assertNotEqual(ref4.id, ref1.id)
512 ref3 = DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run=run)
513 ref4, = registry._importDatasets([ref3])
514 self.assertNotEqual(ref4.id, ref1.id)
516 def testDatasetTypeComponentQueries(self):
517 """Test component options when querying for dataset types.
518 """
519 registry = self.makeRegistry()
520 self.loadData(registry, "base.yaml")
521 self.loadData(registry, "datasets.yaml")
522 # Test querying for dataset types with different inputs.
523 # First query for all dataset types; components should only be included
524 # when components=True.
525 self.assertEqual(
526 {"bias", "flat"},
527 NamedValueSet(registry.queryDatasetTypes()).names
528 )
529 self.assertEqual(
530 {"bias", "flat"},
531 NamedValueSet(registry.queryDatasetTypes(components=False)).names
532 )
533 self.assertLess(
534 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
535 NamedValueSet(registry.queryDatasetTypes(components=True)).names
536 )
537 # Use a pattern that can match either parent or components. Again,
538 # components are only returned if components=True.
539 self.assertEqual(
540 {"bias"},
541 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
542 )
543 self.assertEqual(
544 {"bias"},
545 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
546 )
547 self.assertLess(
548 {"bias", "bias.wcs"},
549 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
550 )
551 # This pattern matches only a component. In this case we also return
552 # that component dataset type if components=None.
553 self.assertEqual(
554 {"bias.wcs"},
555 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
556 )
557 self.assertEqual(
558 set(),
559 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
560 )
561 self.assertEqual(
562 {"bias.wcs"},
563 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
564 )
565 # Add a dataset type using a StorageClass that we'll then remove; check
566 # that this does not affect our ability to query for dataset types
567 # (though it will warn).
568 tempStorageClass = StorageClass(
569 name="TempStorageClass",
570 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
571 )
572 registry.storageClasses.registerStorageClass(tempStorageClass)
573 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
574 universe=registry.dimensions)
575 registry.registerDatasetType(datasetType)
576 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
577 datasetType._storageClass = None
578 del tempStorageClass
579 # Querying for all dataset types, including components, should include
580 # at least all non-component dataset types (and I don't want to
581 # enumerate all of the Exposure components for bias and flat here).
582 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
583 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
584 self.assertIn("TempStorageClass", cm.output[0])
585 self.assertLess({"bias", "flat", "temporary"}, everything.names)
586 # It should not include "temporary.columns", because we tried to remove
587 # the storage class that would tell it about that. So if the next line
588 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
589 # this part of the test isn't doing anything, because the _unregister
590 # call about isn't simulating the real-life case we want it to
591 # simulate, in which different versions of daf_butler in entirely
592 # different Python processes interact with the same repo.
593 self.assertNotIn("temporary.data", everything.names)
594 # Query for dataset types that start with "temp". This should again
595 # not include the component, and also not fail.
596 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
597 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
598 self.assertIn("TempStorageClass", cm.output[0])
599 self.assertEqual({"temporary"}, startsWithTemp.names)
601 def testComponentLookups(self):
602 """Test searching for component datasets via their parents.
603 """
604 registry = self.makeRegistry()
605 self.loadData(registry, "base.yaml")
606 self.loadData(registry, "datasets.yaml")
607 # Test getting the child dataset type (which does still exist in the
608 # Registry), and check for consistency with
609 # DatasetRef.makeComponentRef.
610 collection = "imported_g"
611 parentType = registry.getDatasetType("bias")
612 childType = registry.getDatasetType("bias.wcs")
613 parentRefResolved = registry.findDataset(parentType, collections=collection,
614 instrument="Cam1", detector=1)
615 self.assertIsInstance(parentRefResolved, DatasetRef)
616 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
617 # Search for a single dataset with findDataset.
618 childRef1 = registry.findDataset("bias.wcs", collections=collection,
619 dataId=parentRefResolved.dataId)
620 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
621 # Search for detector data IDs constrained by component dataset
622 # existence with queryDataIds.
623 dataIds = registry.queryDataIds(
624 ["detector"],
625 datasets=["bias.wcs"],
626 collections=collection,
627 ).toSet()
628 self.assertEqual(
629 dataIds,
630 DataCoordinateSet(
631 {
632 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
633 for d in (1, 2, 3)
634 },
635 parentType.dimensions,
636 )
637 )
638 # Search for multiple datasets of a single type with queryDatasets.
639 childRefs2 = set(registry.queryDatasets(
640 "bias.wcs",
641 collections=collection,
642 ))
643 self.assertEqual(
644 {ref.unresolved() for ref in childRefs2},
645 {DatasetRef(childType, dataId) for dataId in dataIds}
646 )
648 def testCollections(self):
649 """Tests for registry methods that manage collections.
650 """
651 registry = self.makeRegistry()
652 self.loadData(registry, "base.yaml")
653 self.loadData(registry, "datasets.yaml")
654 run1 = "imported_g"
655 run2 = "imported_r"
656 # Test setting a collection docstring after it has been created.
657 registry.setCollectionDocumentation(run1, "doc for run1")
658 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
659 registry.setCollectionDocumentation(run1, None)
660 self.assertIsNone(registry.getCollectionDocumentation(run1))
661 datasetType = "bias"
662 # Find some datasets via their run's collection.
663 dataId1 = {"instrument": "Cam1", "detector": 1}
664 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
665 self.assertIsNotNone(ref1)
666 dataId2 = {"instrument": "Cam1", "detector": 2}
667 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
668 self.assertIsNotNone(ref2)
669 # Associate those into a new collection,then look for them there.
670 tag1 = "tag1"
671 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
672 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
673 registry.associate(tag1, [ref1, ref2])
674 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
675 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
676 # Disassociate one and verify that we can't it there anymore...
677 registry.disassociate(tag1, [ref1])
678 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
679 # ...but we can still find ref2 in tag1, and ref1 in the run.
680 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
681 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
682 collections = set(registry.queryCollections())
683 self.assertEqual(collections, {run1, run2, tag1})
684 # Associate both refs into tag1 again; ref2 is already there, but that
685 # should be a harmless no-op.
686 registry.associate(tag1, [ref1, ref2])
687 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
688 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
689 # Get a different dataset (from a different run) that has the same
690 # dataset type and data ID as ref2.
691 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
692 self.assertNotEqual(ref2, ref2b)
693 # Attempting to associate that into tag1 should be an error.
694 with self.assertRaises(ConflictingDefinitionError):
695 registry.associate(tag1, [ref2b])
696 # That error shouldn't have messed up what we had before.
697 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
698 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
699 # Attempt to associate the conflicting dataset again, this time with
700 # a dataset that isn't in the collection and won't cause a conflict.
701 # Should also fail without modifying anything.
702 dataId3 = {"instrument": "Cam1", "detector": 3}
703 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
704 with self.assertRaises(ConflictingDefinitionError):
705 registry.associate(tag1, [ref3, ref2b])
706 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
707 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
708 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
709 # Register a chained collection that searches [tag1, run2]
710 chain1 = "chain1"
711 registry.registerCollection(chain1, type=CollectionType.CHAINED)
712 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
713 # Chained collection exists, but has no collections in it.
714 self.assertFalse(registry.getCollectionChain(chain1))
715 # If we query for all collections, we should get the chained collection
716 # only if we don't ask to flatten it (i.e. yield only its children).
717 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
718 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
719 # Attempt to set its child collections to something circular; that
720 # should fail.
721 with self.assertRaises(ValueError):
722 registry.setCollectionChain(chain1, [tag1, chain1])
723 # Add the child collections.
724 registry.setCollectionChain(chain1, [tag1, run2])
725 self.assertEqual(
726 list(registry.getCollectionChain(chain1)),
727 [tag1, run2]
728 )
729 # Searching for dataId1 or dataId2 in the chain should return ref1 and
730 # ref2, because both are in tag1.
731 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
732 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
733 # Now disassociate ref2 from tag1. The search (for bias) with
734 # dataId2 in chain1 should then:
735 # 1. not find it in tag1
736 # 2. find a different dataset in run2
737 registry.disassociate(tag1, [ref2])
738 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
739 self.assertNotEqual(ref2b, ref2)
740 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
741 # Define a new chain so we can test recursive chains.
742 chain2 = "chain2"
743 registry.registerCollection(chain2, type=CollectionType.CHAINED)
744 registry.setCollectionChain(chain2, [run2, chain1])
745 # Query for collections matching a regex.
746 self.assertCountEqual(
747 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
748 ["imported_r", "imported_g"]
749 )
750 # Query for collections matching a regex or an explicit str.
751 self.assertCountEqual(
752 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
753 ["imported_r", "imported_g", "chain1"]
754 )
755 # Search for bias with dataId1 should find it via tag1 in chain2,
756 # recursing, because is not in run1.
757 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
758 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
759 # Search for bias with dataId2 should find it in run2 (ref2b).
760 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
761 # Search for a flat that is in run2. That should not be found
762 # at the front of chain2, because of the restriction to bias
763 # on run2 there, but it should be found in at the end of chain1.
764 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
765 ref4 = registry.findDataset("flat", dataId4, collections=run2)
766 self.assertIsNotNone(ref4)
767 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
768 # Deleting a collection that's part of a CHAINED collection is not
769 # allowed, and is exception-safe.
770 with self.assertRaises(Exception):
771 registry.removeCollection(run2)
772 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
773 with self.assertRaises(Exception):
774 registry.removeCollection(chain1)
775 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
776 # Actually remove chain2, test that it's gone by asking for its type.
777 registry.removeCollection(chain2)
778 with self.assertRaises(MissingCollectionError):
779 registry.getCollectionType(chain2)
780 # Actually remove run2 and chain1, which should work now.
781 registry.removeCollection(chain1)
782 registry.removeCollection(run2)
783 with self.assertRaises(MissingCollectionError):
784 registry.getCollectionType(run2)
785 with self.assertRaises(MissingCollectionError):
786 registry.getCollectionType(chain1)
787 # Remove tag1 as well, just to test that we can remove TAGGED
788 # collections.
789 registry.removeCollection(tag1)
790 with self.assertRaises(MissingCollectionError):
791 registry.getCollectionType(tag1)
793 def testCollectionChainFlatten(self):
794 """Test that Registry.setCollectionChain obeys its 'flatten' option.
795 """
796 registry = self.makeRegistry()
797 registry.registerCollection("inner", CollectionType.CHAINED)
798 registry.registerCollection("innermost", CollectionType.RUN)
799 registry.setCollectionChain("inner", ["innermost"])
800 registry.registerCollection("outer", CollectionType.CHAINED)
801 registry.setCollectionChain("outer", ["inner"], flatten=False)
802 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
803 registry.setCollectionChain("outer", ["inner"], flatten=True)
804 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
806 def testBasicTransaction(self):
807 """Test that all operations within a single transaction block are
808 rolled back if an exception propagates out of the block.
809 """
810 registry = self.makeRegistry()
811 storageClass = StorageClass("testDatasetType")
812 registry.storageClasses.registerStorageClass(storageClass)
813 with registry.transaction():
814 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
815 with self.assertRaises(ValueError):
816 with registry.transaction():
817 registry.insertDimensionData("instrument", {"name": "Cam2"})
818 raise ValueError("Oops, something went wrong")
819 # Cam1 should exist
820 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
821 # But Cam2 and Cam3 should both not exist
822 with self.assertRaises(LookupError):
823 registry.expandDataId(instrument="Cam2")
824 with self.assertRaises(LookupError):
825 registry.expandDataId(instrument="Cam3")
827 def testNestedTransaction(self):
828 """Test that operations within a transaction block are not rolled back
829 if an exception propagates out of an inner transaction block and is
830 then caught.
831 """
832 registry = self.makeRegistry()
833 dimension = registry.dimensions["instrument"]
834 dataId1 = {"instrument": "DummyCam"}
835 dataId2 = {"instrument": "DummyCam2"}
836 checkpointReached = False
837 with registry.transaction():
838 # This should be added and (ultimately) committed.
839 registry.insertDimensionData(dimension, dataId1)
840 with self.assertRaises(sqlalchemy.exc.IntegrityError):
841 with registry.transaction(savepoint=True):
842 # This does not conflict, and should succeed (but not
843 # be committed).
844 registry.insertDimensionData(dimension, dataId2)
845 checkpointReached = True
846 # This should conflict and raise, triggerring a rollback
847 # of the previous insertion within the same transaction
848 # context, but not the original insertion in the outer
849 # block.
850 registry.insertDimensionData(dimension, dataId1)
851 self.assertTrue(checkpointReached)
852 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
853 with self.assertRaises(LookupError):
854 registry.expandDataId(dataId2, graph=dimension.graph)
856 def testInstrumentDimensions(self):
857 """Test queries involving only instrument dimensions, with no joins to
858 skymap."""
859 registry = self.makeRegistry()
861 # need a bunch of dimensions and datasets for test
862 registry.insertDimensionData(
863 "instrument",
864 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
865 )
866 registry.insertDimensionData(
867 "physical_filter",
868 dict(instrument="DummyCam", name="dummy_r", band="r"),
869 dict(instrument="DummyCam", name="dummy_i", band="i"),
870 )
871 registry.insertDimensionData(
872 "detector",
873 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
874 )
875 registry.insertDimensionData(
876 "visit_system",
877 dict(instrument="DummyCam", id=1, name="default"),
878 )
879 registry.insertDimensionData(
880 "visit",
881 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
882 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
883 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
884 )
885 registry.insertDimensionData(
886 "exposure",
887 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
888 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
889 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
890 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
891 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
892 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
893 )
894 registry.insertDimensionData(
895 "visit_definition",
896 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
897 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
898 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
899 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
900 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
901 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
902 )
903 # dataset types
904 run1 = "test1_r"
905 run2 = "test2_r"
906 tagged2 = "test2_t"
907 registry.registerRun(run1)
908 registry.registerRun(run2)
909 registry.registerCollection(tagged2)
910 storageClass = StorageClass("testDataset")
911 registry.storageClasses.registerStorageClass(storageClass)
912 rawType = DatasetType(name="RAW",
913 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
914 storageClass=storageClass)
915 registry.registerDatasetType(rawType)
916 calexpType = DatasetType(name="CALEXP",
917 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
918 storageClass=storageClass)
919 registry.registerDatasetType(calexpType)
921 # add pre-existing datasets
922 for exposure in (100, 101, 110, 111):
923 for detector in (1, 2, 3):
924 # note that only 3 of 5 detectors have datasets
925 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
926 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
927 # exposures 100 and 101 appear in both run1 and tagged2.
928 # 100 has different datasets in the different collections
929 # 101 has the same dataset in both collections.
930 if exposure == 100:
931 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
932 if exposure in (100, 101):
933 registry.associate(tagged2, [ref])
934 # Add pre-existing datasets to tagged2.
935 for exposure in (200, 201):
936 for detector in (3, 4, 5):
937 # note that only 3 of 5 detectors have datasets
938 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
939 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
940 registry.associate(tagged2, [ref])
942 dimensions = DimensionGraph(
943 registry.dimensions,
944 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
945 )
946 # Test that single dim string works as well as list of str
947 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
948 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
949 self.assertEqual(rows, rowsI)
950 # with empty expression
951 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
952 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
953 for dataId in rows:
954 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
955 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
956 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
957 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
958 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
959 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
960 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
961 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
962 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
963 (100, 101, 110, 111))
964 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
965 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
967 # second collection
968 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
969 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
970 for dataId in rows:
971 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
972 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
973 (100, 101, 200, 201))
974 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
975 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
977 # with two input datasets
978 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
979 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
980 for dataId in rows:
981 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
982 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
983 (100, 101, 110, 111, 200, 201))
984 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
985 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
987 # limit to single visit
988 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
989 where="visit = 10", instrument="DummyCam").toSet()
990 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
991 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
992 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
993 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
995 # more limiting expression, using link names instead of Table.column
996 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
997 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
998 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
999 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1000 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1001 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1003 # queryDataIds with only one of `datasets` and `collections` is an
1004 # error.
1005 with self.assertRaises(TypeError):
1006 registry.queryDataIds(dimensions, datasets=rawType)
1007 with self.assertRaises(TypeError):
1008 registry.queryDataIds(dimensions, collections=run1)
1010 # expression excludes everything
1011 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1012 where="visit > 1000", instrument="DummyCam").toSet()
1013 self.assertEqual(len(rows), 0)
1015 # Selecting by physical_filter, this is not in the dimensions, but it
1016 # is a part of the full expression so it should work too.
1017 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
1018 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
1019 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
1020 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1021 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1022 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1024 def testSkyMapDimensions(self):
1025 """Tests involving only skymap dimensions, no joins to instrument."""
1026 registry = self.makeRegistry()
1028 # need a bunch of dimensions and datasets for test, we want
1029 # "band" in the test so also have to add physical_filter
1030 # dimensions
1031 registry.insertDimensionData(
1032 "instrument",
1033 dict(instrument="DummyCam")
1034 )
1035 registry.insertDimensionData(
1036 "physical_filter",
1037 dict(instrument="DummyCam", name="dummy_r", band="r"),
1038 dict(instrument="DummyCam", name="dummy_i", band="i"),
1039 )
1040 registry.insertDimensionData(
1041 "skymap",
1042 dict(name="DummyMap", hash="sha!".encode("utf8"))
1043 )
1044 for tract in range(10):
1045 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1046 registry.insertDimensionData(
1047 "patch",
1048 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
1049 for patch in range(10)]
1050 )
1052 # dataset types
1053 run = "test"
1054 registry.registerRun(run)
1055 storageClass = StorageClass("testDataset")
1056 registry.storageClasses.registerStorageClass(storageClass)
1057 calexpType = DatasetType(name="deepCoadd_calexp",
1058 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1059 "band")),
1060 storageClass=storageClass)
1061 registry.registerDatasetType(calexpType)
1062 mergeType = DatasetType(name="deepCoadd_mergeDet",
1063 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1064 storageClass=storageClass)
1065 registry.registerDatasetType(mergeType)
1066 measType = DatasetType(name="deepCoadd_meas",
1067 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
1068 "band")),
1069 storageClass=storageClass)
1070 registry.registerDatasetType(measType)
1072 dimensions = DimensionGraph(
1073 registry.dimensions,
1074 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
1075 | measType.dimensions.required)
1076 )
1078 # add pre-existing datasets
1079 for tract in (1, 3, 5):
1080 for patch in (2, 4, 6, 7):
1081 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1082 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1083 for aFilter in ("i", "r"):
1084 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1085 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1087 # with empty expression
1088 rows = registry.queryDataIds(dimensions,
1089 datasets=[calexpType, mergeType], collections=run).toSet()
1090 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
1091 for dataId in rows:
1092 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1093 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1094 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1095 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1097 # limit to 2 tracts and 2 patches
1098 rows = registry.queryDataIds(dimensions,
1099 datasets=[calexpType, mergeType], collections=run,
1100 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
1101 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
1102 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1103 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1104 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1106 # limit to single filter
1107 rows = registry.queryDataIds(dimensions,
1108 datasets=[calexpType, mergeType], collections=run,
1109 where="band = 'i'").toSet()
1110 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
1111 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1112 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1113 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1115 # expression excludes everything, specifying non-existing skymap is
1116 # not a fatal error, it's operator error
1117 rows = registry.queryDataIds(dimensions,
1118 datasets=[calexpType, mergeType], collections=run,
1119 where="skymap = 'Mars'").toSet()
1120 self.assertEqual(len(rows), 0)
1122 def testSpatialJoin(self):
1123 """Test queries that involve spatial overlap joins.
1124 """
1125 registry = self.makeRegistry()
1126 self.loadData(registry, "hsc-rc2-subset.yaml")
1128 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1129 # the TopologicalFamily they belong to. We'll relate all elements in
1130 # each family to all of the elements in each other family.
1131 families = defaultdict(set)
1132 # Dictionary of {element.name: {dataId: region}}.
1133 regions = {}
1134 for element in registry.dimensions.getDatabaseElements():
1135 if element.spatial is not None:
1136 families[element.spatial.name].add(element)
1137 regions[element.name] = {
1138 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1139 }
1141 # If this check fails, it's not necessarily a problem - it may just be
1142 # a reasonable change to the default dimension definitions - but the
1143 # test below depends on there being more than one family to do anything
1144 # useful.
1145 self.assertEqual(len(families), 2)
1147 # Overlap DatabaseDimensionElements with each other.
1148 for family1, family2 in itertools.combinations(families, 2):
1149 for element1, element2 in itertools.product(families[family1], families[family2]):
1150 graph = DimensionGraph.union(element1.graph, element2.graph)
1151 # Construct expected set of overlapping data IDs via a
1152 # brute-force comparison of the regions we've already fetched.
1153 expected = {
1154 DataCoordinate.standardize(
1155 {**dataId1.byName(), **dataId2.byName()},
1156 graph=graph
1157 )
1158 for (dataId1, region1), (dataId2, region2)
1159 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1160 if not region1.isDisjointFrom(region2)
1161 }
1162 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1163 queried = set(registry.queryDataIds(graph))
1164 self.assertEqual(expected, queried)
1166 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1167 commonSkyPix = registry.dimensions.commonSkyPix
1168 for elementName, regions in regions.items():
1169 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1170 expected = set()
1171 for dataId, region in regions.items():
1172 for begin, end in commonSkyPix.pixelization.envelope(region):
1173 expected.update(
1174 DataCoordinate.standardize(
1175 {commonSkyPix.name: index, **dataId.byName()},
1176 graph=graph
1177 )
1178 for index in range(begin, end)
1179 )
1180 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1181 queried = set(registry.queryDataIds(graph))
1182 self.assertEqual(expected, queried)
1184 def testAbstractQuery(self):
1185 """Test that we can run a query that just lists the known
1186 bands. This is tricky because band is
1187 backed by a query against physical_filter.
1188 """
1189 registry = self.makeRegistry()
1190 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1191 registry.insertDimensionData(
1192 "physical_filter",
1193 dict(instrument="DummyCam", name="dummy_i", band="i"),
1194 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1195 dict(instrument="DummyCam", name="dummy_r", band="r"),
1196 )
1197 rows = registry.queryDataIds(["band"]).toSet()
1198 self.assertCountEqual(
1199 rows,
1200 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1201 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1202 )
1204 def testAttributeManager(self):
1205 """Test basic functionality of attribute manager.
1206 """
1207 # number of attributes with schema versions in a fresh database,
1208 # 6 managers with 3 records per manager, plus config for dimensions
1209 VERSION_COUNT = 6 * 3 + 1
1211 registry = self.makeRegistry()
1212 attributes = registry._managers.attributes
1214 # check what get() returns for non-existing key
1215 self.assertIsNone(attributes.get("attr"))
1216 self.assertEqual(attributes.get("attr", ""), "")
1217 self.assertEqual(attributes.get("attr", "Value"), "Value")
1218 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1220 # cannot store empty key or value
1221 with self.assertRaises(ValueError):
1222 attributes.set("", "value")
1223 with self.assertRaises(ValueError):
1224 attributes.set("attr", "")
1226 # set value of non-existing key
1227 attributes.set("attr", "value")
1228 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1229 self.assertEqual(attributes.get("attr"), "value")
1231 # update value of existing key
1232 with self.assertRaises(ButlerAttributeExistsError):
1233 attributes.set("attr", "value2")
1235 attributes.set("attr", "value2", force=True)
1236 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1237 self.assertEqual(attributes.get("attr"), "value2")
1239 # delete existing key
1240 self.assertTrue(attributes.delete("attr"))
1241 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1243 # delete non-existing key
1244 self.assertFalse(attributes.delete("non-attr"))
1246 # store bunch of keys and get the list back
1247 data = [
1248 ("version.core", "1.2.3"),
1249 ("version.dimensions", "3.2.1"),
1250 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1251 ]
1252 for key, value in data:
1253 attributes.set(key, value)
1254 items = dict(attributes.items())
1255 for key, value in data:
1256 self.assertEqual(items[key], value)
1258 def testQueryDatasetsDeduplication(self):
1259 """Test that the findFirst option to queryDatasets selects datasets
1260 from collections in the order given".
1261 """
1262 registry = self.makeRegistry()
1263 self.loadData(registry, "base.yaml")
1264 self.loadData(registry, "datasets.yaml")
1265 self.assertCountEqual(
1266 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1267 [
1268 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1269 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1270 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1271 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1272 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1273 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1274 ]
1275 )
1276 self.assertCountEqual(
1277 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1278 findFirst=True)),
1279 [
1280 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1281 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1282 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1283 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1284 ]
1285 )
1286 self.assertCountEqual(
1287 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1288 findFirst=True)),
1289 [
1290 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1291 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1292 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1293 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1294 ]
1295 )
1297 def testQueryResults(self):
1298 """Test querying for data IDs and then manipulating the QueryResults
1299 object returned to perform other queries.
1300 """
1301 registry = self.makeRegistry()
1302 self.loadData(registry, "base.yaml")
1303 self.loadData(registry, "datasets.yaml")
1304 bias = registry.getDatasetType("bias")
1305 flat = registry.getDatasetType("flat")
1306 # Obtain expected results from methods other than those we're testing
1307 # here. That includes:
1308 # - the dimensions of the data IDs we want to query:
1309 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1310 # - the dimensions of some other data IDs we'll extract from that:
1311 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1312 # - the data IDs we expect to obtain from the first queries:
1313 expectedDataIds = DataCoordinateSet(
1314 {
1315 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1316 universe=registry.dimensions)
1317 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1318 },
1319 graph=expectedGraph,
1320 hasFull=False,
1321 hasRecords=False,
1322 )
1323 # - the flat datasets we expect to find from those data IDs, in just
1324 # one collection (so deduplication is irrelevant):
1325 expectedFlats = [
1326 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1327 collections="imported_r"),
1328 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1329 collections="imported_r"),
1330 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1331 collections="imported_r"),
1332 ]
1333 # - the data IDs we expect to extract from that:
1334 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1335 # - the bias datasets we expect to find from those data IDs, after we
1336 # subset-out the physical_filter dimension, both with duplicates:
1337 expectedAllBiases = [
1338 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1339 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1340 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1341 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1342 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1343 ]
1344 # - ...and without duplicates:
1345 expectedDeduplicatedBiases = [
1346 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1347 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1348 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1349 ]
1350 # Test against those expected results, using a "lazy" query for the
1351 # data IDs (which re-executes that query each time we use it to do
1352 # something new).
1353 dataIds = registry.queryDataIds(
1354 ["detector", "physical_filter"],
1355 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1356 instrument="Cam1",
1357 )
1358 self.assertEqual(dataIds.graph, expectedGraph)
1359 self.assertEqual(dataIds.toSet(), expectedDataIds)
1360 self.assertCountEqual(
1361 list(
1362 dataIds.findDatasets(
1363 flat,
1364 collections=["imported_r"],
1365 )
1366 ),
1367 expectedFlats,
1368 )
1369 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1370 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1371 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1372 self.assertCountEqual(
1373 list(
1374 subsetDataIds.findDatasets(
1375 bias,
1376 collections=["imported_r", "imported_g"],
1377 findFirst=False
1378 )
1379 ),
1380 expectedAllBiases
1381 )
1382 self.assertCountEqual(
1383 list(
1384 subsetDataIds.findDatasets(
1385 bias,
1386 collections=["imported_r", "imported_g"],
1387 findFirst=True
1388 )
1389 ), expectedDeduplicatedBiases
1390 )
1391 # Materialize the bias dataset queries (only) by putting the results
1392 # into temporary tables, then repeat those tests.
1393 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1394 findFirst=False).materialize() as biases:
1395 self.assertCountEqual(list(biases), expectedAllBiases)
1396 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1397 findFirst=True).materialize() as biases:
1398 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1399 # Materialize the data ID subset query, but not the dataset queries.
1400 with subsetDataIds.materialize() as subsetDataIds:
1401 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1402 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1403 self.assertCountEqual(
1404 list(
1405 subsetDataIds.findDatasets(
1406 bias,
1407 collections=["imported_r", "imported_g"],
1408 findFirst=False
1409 )
1410 ),
1411 expectedAllBiases
1412 )
1413 self.assertCountEqual(
1414 list(
1415 subsetDataIds.findDatasets(
1416 bias,
1417 collections=["imported_r", "imported_g"],
1418 findFirst=True
1419 )
1420 ), expectedDeduplicatedBiases
1421 )
1422 # Materialize the dataset queries, too.
1423 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1424 findFirst=False).materialize() as biases:
1425 self.assertCountEqual(list(biases), expectedAllBiases)
1426 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1427 findFirst=True).materialize() as biases:
1428 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1429 # Materialize the original query, but none of the follow-up queries.
1430 with dataIds.materialize() as dataIds:
1431 self.assertEqual(dataIds.graph, expectedGraph)
1432 self.assertEqual(dataIds.toSet(), expectedDataIds)
1433 self.assertCountEqual(
1434 list(
1435 dataIds.findDatasets(
1436 flat,
1437 collections=["imported_r"],
1438 )
1439 ),
1440 expectedFlats,
1441 )
1442 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1443 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1444 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1445 self.assertCountEqual(
1446 list(
1447 subsetDataIds.findDatasets(
1448 bias,
1449 collections=["imported_r", "imported_g"],
1450 findFirst=False
1451 )
1452 ),
1453 expectedAllBiases
1454 )
1455 self.assertCountEqual(
1456 list(
1457 subsetDataIds.findDatasets(
1458 bias,
1459 collections=["imported_r", "imported_g"],
1460 findFirst=True
1461 )
1462 ), expectedDeduplicatedBiases
1463 )
1464 # Materialize just the bias dataset queries.
1465 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1466 findFirst=False).materialize() as biases:
1467 self.assertCountEqual(list(biases), expectedAllBiases)
1468 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1469 findFirst=True).materialize() as biases:
1470 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1471 # Materialize the subset data ID query, but not the dataset
1472 # queries.
1473 with subsetDataIds.materialize() as subsetDataIds:
1474 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1475 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1476 self.assertCountEqual(
1477 list(
1478 subsetDataIds.findDatasets(
1479 bias,
1480 collections=["imported_r", "imported_g"],
1481 findFirst=False
1482 )
1483 ),
1484 expectedAllBiases
1485 )
1486 self.assertCountEqual(
1487 list(
1488 subsetDataIds.findDatasets(
1489 bias,
1490 collections=["imported_r", "imported_g"],
1491 findFirst=True
1492 )
1493 ), expectedDeduplicatedBiases
1494 )
1495 # Materialize the bias dataset queries, too, so now we're
1496 # materializing every single step.
1497 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1498 findFirst=False).materialize() as biases:
1499 self.assertCountEqual(list(biases), expectedAllBiases)
1500 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1501 findFirst=True).materialize() as biases:
1502 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1504 def testEmptyDimensionsQueries(self):
1505 """Test Query and QueryResults objects in the case where there are no
1506 dimensions.
1507 """
1508 # Set up test data: one dataset type, two runs, one dataset in each.
1509 registry = self.makeRegistry()
1510 self.loadData(registry, "base.yaml")
1511 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1512 registry.registerDatasetType(schema)
1513 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1514 run1 = "run1"
1515 run2 = "run2"
1516 registry.registerRun(run1)
1517 registry.registerRun(run2)
1518 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1519 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1520 # Query directly for both of the datasets, and each one, one at a time.
1521 self.checkQueryResults(
1522 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False),
1523 [dataset1, dataset2]
1524 )
1525 self.checkQueryResults(
1526 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1527 [dataset1],
1528 )
1529 self.checkQueryResults(
1530 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1531 [dataset2],
1532 )
1533 # Query for data IDs with no dimensions.
1534 dataIds = registry.queryDataIds([])
1535 self.checkQueryResults(dataIds, [dataId])
1536 # Use queried data IDs to find the datasets.
1537 self.checkQueryResults(
1538 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1539 [dataset1, dataset2],
1540 )
1541 self.checkQueryResults(
1542 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1543 [dataset1],
1544 )
1545 self.checkQueryResults(
1546 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1547 [dataset2],
1548 )
1549 # Now materialize the data ID query results and repeat those tests.
1550 with dataIds.materialize() as dataIds:
1551 self.checkQueryResults(dataIds, [dataId])
1552 self.checkQueryResults(
1553 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1554 [dataset1],
1555 )
1556 self.checkQueryResults(
1557 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1558 [dataset2],
1559 )
1560 # Query for non-empty data IDs, then subset that to get the empty one.
1561 # Repeat the above tests starting from that.
1562 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1563 self.checkQueryResults(dataIds, [dataId])
1564 self.checkQueryResults(
1565 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1566 [dataset1, dataset2],
1567 )
1568 self.checkQueryResults(
1569 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1570 [dataset1],
1571 )
1572 self.checkQueryResults(
1573 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1574 [dataset2],
1575 )
1576 with dataIds.materialize() as dataIds:
1577 self.checkQueryResults(dataIds, [dataId])
1578 self.checkQueryResults(
1579 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1580 [dataset1, dataset2],
1581 )
1582 self.checkQueryResults(
1583 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1584 [dataset1],
1585 )
1586 self.checkQueryResults(
1587 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1588 [dataset2],
1589 )
1590 # Query for non-empty data IDs, then materialize, then subset to get
1591 # the empty one. Repeat again.
1592 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1593 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1594 self.checkQueryResults(dataIds, [dataId])
1595 self.checkQueryResults(
1596 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1597 [dataset1, dataset2],
1598 )
1599 self.checkQueryResults(
1600 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1601 [dataset1],
1602 )
1603 self.checkQueryResults(
1604 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1605 [dataset2],
1606 )
1607 with dataIds.materialize() as dataIds:
1608 self.checkQueryResults(
1609 dataIds,
1610 [dataId]
1611 )
1612 self.checkQueryResults(
1613 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1614 [dataset1, dataset2],
1615 )
1616 self.checkQueryResults(
1617 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1618 [dataset1],
1619 )
1620 self.checkQueryResults(
1621 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1622 [dataset2],
1623 )
1625 def testDimensionDataModifications(self):
1626 """Test that modifying dimension records via:
1627 syncDimensionData(..., update=True) and
1628 insertDimensionData(..., replace=True) works as expected, even in the
1629 presence of datasets using those dimensions and spatial overlap
1630 relationships.
1631 """
1633 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1634 """Unpack a sphgeom.RangeSet into the integers it contains.
1635 """
1636 for begin, end in ranges:
1637 yield from range(begin, end)
1639 def range_set_hull(
1640 ranges: lsst.sphgeom.RangeSet,
1641 pixelization: lsst.sphgeom.HtmPixelization,
1642 ) -> lsst.sphgeom.ConvexPolygon:
1643 """Create a ConvexPolygon hull of the region defined by a set of
1644 HTM pixelization index ranges.
1645 """
1646 points = []
1647 for index in unpack_range_set(ranges):
1648 points.extend(pixelization.triangle(index).getVertices())
1649 return lsst.sphgeom.ConvexPolygon(points)
1651 # Use HTM to set up an initial parent region (one arbitrary trixel)
1652 # and four child regions (the trixels within the parent at the next
1653 # level. We'll use the parent as a tract/visit region and the children
1654 # as its patch/visit_detector regions.
1655 registry = self.makeRegistry()
1656 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1657 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1658 index = 12288
1659 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1660 assert htm6.universe().contains(child_ranges_small)
1661 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1662 parent_region_small = lsst.sphgeom.ConvexPolygon(
1663 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1664 )
1665 assert all(parent_region_small.contains(c) for c in child_regions_small)
1666 # Make a larger version of each child region, defined to be the set of
1667 # htm6 trixels that overlap the original's bounding circle. Make a new
1668 # parent that's the convex hull of the new children.
1669 child_regions_large = [
1670 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6)
1671 for c in child_regions_small
1672 ]
1673 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1674 parent_region_large = lsst.sphgeom.ConvexPolygon(
1675 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1676 )
1677 assert all(parent_region_large.contains(c) for c in child_regions_large)
1678 assert parent_region_large.contains(parent_region_small)
1679 assert not parent_region_small.contains(parent_region_large)
1680 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1681 # Find some commonSkyPix indices that overlap the large regions but not
1682 # overlap the small regions. We use commonSkyPix here to make sure the
1683 # real tests later involve what's in the database, not just post-query
1684 # region filtering.
1685 child_difference_indices = []
1686 for large, small in zip(child_regions_large, child_regions_small):
1687 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1688 assert difference, "if this is empty, we can't test anything useful with these regions"
1689 assert all(
1690 not commonSkyPix.triangle(d).isDisjointFrom(large)
1691 and commonSkyPix.triangle(d).isDisjointFrom(small)
1692 for d in difference
1693 )
1694 child_difference_indices.append(difference)
1695 parent_difference_indices = list(
1696 unpack_range_set(
1697 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1698 )
1699 )
1700 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1701 assert all(
1702 (
1703 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1704 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1705 )
1706 for d in parent_difference_indices
1707 )
1708 # Now that we've finally got those regions, we'll insert the large ones
1709 # as tract/patch dimension records.
1710 skymap_name = "testing_v1"
1711 registry.insertDimensionData(
1712 "skymap", {
1713 "name": skymap_name,
1714 "hash": bytes([42]),
1715 "tract_max": 1,
1716 "patch_nx_max": 2,
1717 "patch_ny_max": 2,
1718 }
1719 )
1720 registry.insertDimensionData(
1721 "tract",
1722 {"skymap": skymap_name, "id": 0, "region": parent_region_large}
1723 )
1724 registry.insertDimensionData(
1725 "patch",
1726 *[{
1727 "skymap": skymap_name,
1728 "tract": 0,
1729 "id": n,
1730 "cell_x": n % 2,
1731 "cell_y": n // 2,
1732 "region": c
1733 } for n, c in enumerate(child_regions_large)]
1734 )
1735 # Add at dataset that uses these dimensions to make sure that modifying
1736 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1737 # implement insert with replace=True as delete-then-insert).
1738 dataset_type = DatasetType(
1739 "coadd",
1740 dimensions=["tract", "patch"],
1741 universe=registry.dimensions,
1742 storageClass="Exposure",
1743 )
1744 registry.registerDatasetType(dataset_type)
1745 registry.registerCollection("the_run", CollectionType.RUN)
1746 registry.insertDatasets(
1747 dataset_type,
1748 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1749 run="the_run",
1750 )
1751 # Query for tracts and patches that overlap some "difference" htm9
1752 # pixels; there should be overlaps, because the database has
1753 # the "large" suite of regions.
1754 self.assertEqual(
1755 {0},
1756 {
1757 data_id["tract"] for data_id in registry.queryDataIds(
1758 ["tract"],
1759 skymap=skymap_name,
1760 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1761 )
1762 }
1763 )
1764 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1765 self.assertIn(
1766 patch_id,
1767 {
1768 data_id["patch"] for data_id in registry.queryDataIds(
1769 ["patch"],
1770 skymap=skymap_name,
1771 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1772 )
1773 }
1774 )
1775 # Use sync to update the tract region and insert to update the patch
1776 # regions, to the "small" suite.
1777 updated = registry.syncDimensionData(
1778 "tract",
1779 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1780 update=True,
1781 )
1782 self.assertEqual(updated, {"region": parent_region_large})
1783 registry.insertDimensionData(
1784 "patch",
1785 *[{
1786 "skymap": skymap_name,
1787 "tract": 0,
1788 "id": n,
1789 "cell_x": n % 2,
1790 "cell_y": n // 2,
1791 "region": c
1792 } for n, c in enumerate(child_regions_small)],
1793 replace=True
1794 )
1795 # Query again; there now should be no such overlaps, because the
1796 # database has the "small" suite of regions.
1797 self.assertFalse(
1798 set(
1799 registry.queryDataIds(
1800 ["tract"],
1801 skymap=skymap_name,
1802 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1803 )
1804 )
1805 )
1806 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1807 self.assertNotIn(
1808 patch_id,
1809 {
1810 data_id["patch"] for data_id in registry.queryDataIds(
1811 ["patch"],
1812 skymap=skymap_name,
1813 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1814 )
1815 }
1816 )
1817 # Update back to the large regions and query one more time.
1818 updated = registry.syncDimensionData(
1819 "tract",
1820 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1821 update=True,
1822 )
1823 self.assertEqual(updated, {"region": parent_region_small})
1824 registry.insertDimensionData(
1825 "patch",
1826 *[{
1827 "skymap": skymap_name,
1828 "tract": 0,
1829 "id": n,
1830 "cell_x": n % 2,
1831 "cell_y": n // 2,
1832 "region": c
1833 } for n, c in enumerate(child_regions_large)],
1834 replace=True
1835 )
1836 self.assertEqual(
1837 {0},
1838 {
1839 data_id["tract"] for data_id in registry.queryDataIds(
1840 ["tract"],
1841 skymap=skymap_name,
1842 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1843 )
1844 }
1845 )
1846 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1847 self.assertIn(
1848 patch_id,
1849 {
1850 data_id["patch"] for data_id in registry.queryDataIds(
1851 ["patch"],
1852 skymap=skymap_name,
1853 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1854 )
1855 }
1856 )
1858 def testCalibrationCollections(self):
1859 """Test operations on `~CollectionType.CALIBRATION` collections,
1860 including `Registry.certify`, `Registry.decertify`, and
1861 `Registry.findDataset`.
1862 """
1863 # Setup - make a Registry, fill it with some datasets in
1864 # non-calibration collections.
1865 registry = self.makeRegistry()
1866 self.loadData(registry, "base.yaml")
1867 self.loadData(registry, "datasets.yaml")
1868 # Set up some timestamps.
1869 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1870 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1871 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1872 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1873 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1874 allTimespans = [
1875 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1876 ]
1877 # Get references to some datasets.
1878 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1879 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1880 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1881 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1882 # Register the main calibration collection we'll be working with.
1883 collection = "Cam1/calibs/default"
1884 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1885 # Cannot associate into a calibration collection (no timespan).
1886 with self.assertRaises(TypeError):
1887 registry.associate(collection, [bias2a])
1888 # Certify 2a dataset with [t2, t4) validity.
1889 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1890 # We should not be able to certify 2b with anything overlapping that
1891 # window.
1892 with self.assertRaises(ConflictingDefinitionError):
1893 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1894 with self.assertRaises(ConflictingDefinitionError):
1895 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1896 with self.assertRaises(ConflictingDefinitionError):
1897 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1898 with self.assertRaises(ConflictingDefinitionError):
1899 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1900 with self.assertRaises(ConflictingDefinitionError):
1901 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1902 with self.assertRaises(ConflictingDefinitionError):
1903 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1904 with self.assertRaises(ConflictingDefinitionError):
1905 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1906 with self.assertRaises(ConflictingDefinitionError):
1907 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1908 # We should be able to certify 3a with a range overlapping that window,
1909 # because it's for a different detector.
1910 # We'll certify 3a over [t1, t3).
1911 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1912 # Now we'll certify 2b and 3b together over [t4, ∞).
1913 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1915 # Fetch all associations and check that they are what we expect.
1916 self.assertCountEqual(
1917 list(
1918 registry.queryDatasetAssociations(
1919 "bias",
1920 collections=[collection, "imported_g", "imported_r"],
1921 )
1922 ),
1923 [
1924 DatasetAssociation(
1925 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1926 collection="imported_g",
1927 timespan=None,
1928 ),
1929 DatasetAssociation(
1930 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1931 collection="imported_r",
1932 timespan=None,
1933 ),
1934 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1935 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1936 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1937 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1938 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1939 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1940 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1941 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1942 ]
1943 )
1945 class Ambiguous:
1946 """Tag class to denote lookups that are expected to be ambiguous.
1947 """
1948 pass
1950 def assertLookup(detector: int, timespan: Timespan,
1951 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1952 """Local function that asserts that a bias lookup returns the given
1953 expected result.
1954 """
1955 if expected is Ambiguous:
1956 with self.assertRaises(RuntimeError):
1957 registry.findDataset("bias", collections=collection, instrument="Cam1",
1958 detector=detector, timespan=timespan)
1959 else:
1960 self.assertEqual(
1961 expected,
1962 registry.findDataset("bias", collections=collection, instrument="Cam1",
1963 detector=detector, timespan=timespan)
1964 )
1966 # Systematically test lookups against expected results.
1967 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1968 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1969 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1970 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1971 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1972 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1973 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1974 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1975 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1976 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1977 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1978 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1979 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1980 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1981 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1982 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1983 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1984 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1985 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1986 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1987 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1988 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1989 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1990 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1991 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1992 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1993 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1994 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1995 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1996 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1997 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1998 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1999 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2000 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2001 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2002 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2003 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2004 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2005 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2006 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2007 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2008 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2010 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2011 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2012 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2013 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2014 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2015 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2016 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2017 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2018 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2019 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2020 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2021 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2022 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2023 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2024 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2025 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2026 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2027 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2028 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2029 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2030 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2031 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2032 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2033 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2034 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2035 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2036 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2037 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2038 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2039 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2040 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2041 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2042 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2043 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2044 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2045 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2046 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2047 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2048 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2049 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2050 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2051 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2052 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2053 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2054 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2055 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2057 # Decertify everything, this time with explicit data IDs, then check
2058 # that no lookups succeed.
2059 registry.decertify(
2060 collection, "bias", Timespan(None, None),
2061 dataIds=[
2062 dict(instrument="Cam1", detector=2),
2063 dict(instrument="Cam1", detector=3),
2064 ]
2065 )
2066 for detector in (2, 3):
2067 for timespan in allTimespans:
2068 assertLookup(detector=detector, timespan=timespan, expected=None)
2069 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2070 # those.
2071 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
2072 for timespan in allTimespans:
2073 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2074 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2075 # Decertify just bias2 over [t2, t4).
2076 # This should split a single certification row into two (and leave the
2077 # other existing row, for bias3a, alone).
2078 registry.decertify(collection, "bias", Timespan(t2, t4),
2079 dataIds=[dict(instrument="Cam1", detector=2)])
2080 for timespan in allTimespans:
2081 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2082 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2083 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2084 if overlapsBefore and overlapsAfter:
2085 expected = Ambiguous
2086 elif overlapsBefore or overlapsAfter:
2087 expected = bias2a
2088 else:
2089 expected = None
2090 assertLookup(detector=2, timespan=timespan, expected=expected)
2092 def testSkipCalibs(self):
2093 """Test how queries handle skipping of calibration collections.
2094 """
2095 registry = self.makeRegistry()
2096 self.loadData(registry, "base.yaml")
2097 self.loadData(registry, "datasets.yaml")
2099 coll_calib = "Cam1/calibs/default"
2100 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2102 # Add all biases to the calibration collection.
2103 # Without this, the logic that prunes dataset subqueries based on
2104 # datasetType-collection summary information will fire before the logic
2105 # we want to test below. This is a good thing (it avoids the dreaded
2106 # NotImplementedError a bit more often) everywhere but here.
2107 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2109 coll_list = [coll_calib, "imported_g", "imported_r"]
2110 chain = "Cam1/chain"
2111 registry.registerCollection(chain, type=CollectionType.CHAINED)
2112 registry.setCollectionChain(chain, coll_list)
2114 # explicit list will raise if findFirst=True or there are temporal
2115 # dimensions
2116 with self.assertRaises(NotImplementedError):
2117 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2118 with self.assertRaises(NotImplementedError):
2119 registry.queryDataIds(["instrument", "detector", "exposure"], datasets="bias",
2120 collections=coll_list)
2122 # chain will skip
2123 datasets = list(registry.queryDatasets("bias", collections=chain))
2124 self.assertGreater(len(datasets), 0)
2126 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias",
2127 collections=chain))
2128 self.assertGreater(len(dataIds), 0)
2130 # glob will skip too
2131 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2132 self.assertGreater(len(datasets), 0)
2134 # regular expression will skip too
2135 pattern = re.compile(".*")
2136 datasets = list(registry.queryDatasets("bias", collections=pattern))
2137 self.assertGreater(len(datasets), 0)
2139 # ellipsis should work as usual
2140 datasets = list(registry.queryDatasets("bias", collections=...))
2141 self.assertGreater(len(datasets), 0)
2143 # few tests with findFirst
2144 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2145 self.assertGreater(len(datasets), 0)
2147 def testIngestTimeQuery(self):
2149 registry = self.makeRegistry()
2150 self.loadData(registry, "base.yaml")
2151 dt0 = datetime.utcnow()
2152 self.loadData(registry, "datasets.yaml")
2153 dt1 = datetime.utcnow()
2155 datasets = list(registry.queryDatasets(..., collections=...))
2156 len0 = len(datasets)
2157 self.assertGreater(len0, 0)
2159 where = "ingest_date > T'2000-01-01'"
2160 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2161 len1 = len(datasets)
2162 self.assertEqual(len0, len1)
2164 # no one will ever use this piece of software in 30 years
2165 where = "ingest_date > T'2050-01-01'"
2166 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2167 len2 = len(datasets)
2168 self.assertEqual(len2, 0)
2170 # Check more exact timing to make sure there is no 37 seconds offset
2171 # (after fixing DM-30124). SQLite time precision is 1 second, make
2172 # sure that we don't test with higher precision.
2173 tests = [
2174 # format: (timestamp, operator, expected_len)
2175 (dt0 - timedelta(seconds=1), ">", len0),
2176 (dt0 - timedelta(seconds=1), "<", 0),
2177 (dt1 + timedelta(seconds=1), "<", len0),
2178 (dt1 + timedelta(seconds=1), ">", 0),
2179 ]
2180 for dt, op, expect_len in tests:
2181 dt_str = dt.isoformat(sep=" ")
2183 where = f"ingest_date {op} T'{dt_str}'"
2184 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2185 self.assertEqual(len(datasets), expect_len)
2187 # same with bind using datetime or astropy Time
2188 where = f"ingest_date {op} ingest_time"
2189 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2190 bind={"ingest_time": dt}))
2191 self.assertEqual(len(datasets), expect_len)
2193 dt_astropy = astropy.time.Time(dt, format="datetime")
2194 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2195 bind={"ingest_time": dt_astropy}))
2196 self.assertEqual(len(datasets), expect_len)
2198 def testTimespanQueries(self):
2199 """Test query expressions involving timespans.
2200 """
2201 registry = self.makeRegistry()
2202 self.loadData(registry, "hsc-rc2-subset.yaml")
2203 # All exposures in the database; mapping from ID to timespan.
2204 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2205 # Just those IDs, sorted (which is also temporal sorting, because HSC
2206 # exposure IDs are monotonically increasing).
2207 ids = sorted(visits.keys())
2208 self.assertGreater(len(ids), 20)
2209 # Pick some quasi-random indexes into `ids` to play with.
2210 i1 = int(len(ids)*0.1)
2211 i2 = int(len(ids)*0.3)
2212 i3 = int(len(ids)*0.6)
2213 i4 = int(len(ids)*0.8)
2214 # Extract some times from those: just before the beginning of i1 (which
2215 # should be after the end of the exposure before), exactly the
2216 # beginning of i2, just after the beginning of i3 (and before its end),
2217 # and the exact end of i4.
2218 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2219 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2220 t2 = visits[ids[i2]].begin
2221 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2222 self.assertLess(t3, visits[ids[i3]].end)
2223 t4 = visits[ids[i4]].end
2224 # Make sure those are actually in order.
2225 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2227 bind = {
2228 "t1": t1,
2229 "t2": t2,
2230 "t3": t3,
2231 "t4": t4,
2232 "ts23": Timespan(t2, t3),
2233 }
2235 def query(where):
2236 """Helper function that queries for visit data IDs and returns
2237 results as a sorted, deduplicated list of visit IDs.
2238 """
2239 return sorted(
2240 {dataId["visit"] for dataId in registry.queryDataIds("visit",
2241 instrument="HSC",
2242 bind=bind,
2243 where=where)}
2244 )
2246 # Try a bunch of timespan queries, mixing up the bounds themselves,
2247 # where they appear in the expression, and how we get the timespan into
2248 # the expression.
2250 # t1 is before the start of i1, so this should not include i1.
2251 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2252 # t2 is exactly at the start of i2, but ends are exclusive, so these
2253 # should not include i2.
2254 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2255 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2256 # t3 is in the middle of i3, so this should include i3.
2257 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
2258 # This one should not include t3 by the same reasoning.
2259 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
2260 # t4 is exactly at the end of i4, so this should include i4.
2261 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2262 # i4's upper bound of t4 is exclusive so this should not include t4.
2263 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
2265 # Now some timespan vs. time scalar queries.
2266 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2267 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2268 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
2269 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
2270 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
2271 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2273 # Empty timespans should not overlap anything.
2274 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2276 def testCollectionSummaries(self):
2277 """Test recording and retrieval of collection summaries.
2278 """
2279 self.maxDiff = None
2280 registry = self.makeRegistry()
2281 # Importing datasets from yaml should go through the code path where
2282 # we update collection summaries as we insert datasets.
2283 self.loadData(registry, "base.yaml")
2284 self.loadData(registry, "datasets.yaml")
2285 flat = registry.getDatasetType("flat")
2286 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2287 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2288 expected1.datasetTypes.add(flat)
2289 expected1.dimensions.update_extract(
2290 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2291 )
2292 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2293 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2294 # Create a chained collection with both of the imported runs; the
2295 # summary should be the same, because it's a union with itself.
2296 chain = "chain"
2297 registry.registerCollection(chain, CollectionType.CHAINED)
2298 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2299 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2300 # Associate flats only into a tagged collection and a calibration
2301 # collection to check summaries of those.
2302 tag = "tag"
2303 registry.registerCollection(tag, CollectionType.TAGGED)
2304 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2305 calibs = "calibs"
2306 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2307 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
2308 timespan=Timespan(None, None))
2309 expected2 = expected1.copy()
2310 expected2.datasetTypes.discard("bias")
2311 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2312 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2313 # Explicitly calling Registry.refresh() should load those same
2314 # summaries, via a totally different code path.
2315 registry.refresh()
2316 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2317 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2318 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2319 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2321 def testUnrelatedDimensionQueries(self):
2322 """Test that WHERE expressions in queries can reference dimensions that
2323 are not in the result set.
2324 """
2325 registry = self.makeRegistry()
2326 # There is no data to back this query, but it should still return
2327 # zero records instead of raising.
2328 self.assertFalse(
2329 set(registry.queryDataIds(["visit", "detector"],
2330 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
2331 )
2333 def testBindInQueryDatasets(self):
2334 """Test that the bind parameter is correctly forwarded in
2335 queryDatasets recursion.
2336 """
2337 registry = self.makeRegistry()
2338 # Importing datasets from yaml should go through the code path where
2339 # we update collection summaries as we insert datasets.
2340 self.loadData(registry, "base.yaml")
2341 self.loadData(registry, "datasets.yaml")
2342 self.assertEqual(
2343 set(registry.queryDatasets("flat", band="r", collections=...)),
2344 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2345 )
2347 def testQueryResultSummaries(self):
2348 """Test summary methods like `count`, `any`, and `explain_no_results`
2349 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2350 """
2351 registry = self.makeRegistry()
2352 self.loadData(registry, "base.yaml")
2353 self.loadData(registry, "datasets.yaml")
2354 self.loadData(registry, "spatial.yaml")
2355 # Default test dataset has two collections, each with both flats and
2356 # biases. Add a new collection with only biases.
2357 registry.registerCollection("biases", CollectionType.TAGGED)
2358 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2359 # First query yields two results, and involves no postprocessing.
2360 query1 = registry.queryDataIds(["physical_filter"], band="r")
2361 self.assertTrue(query1.any(execute=False, exact=False))
2362 self.assertTrue(query1.any(execute=True, exact=False))
2363 self.assertTrue(query1.any(execute=True, exact=True))
2364 self.assertEqual(query1.count(exact=False), 2)
2365 self.assertEqual(query1.count(exact=True), 2)
2366 self.assertFalse(list(query1.explain_no_results()))
2367 # Second query should yield no results, but this isn't detectable
2368 # unless we actually run a query.
2369 query2 = registry.queryDataIds(["physical_filter"], band="h")
2370 self.assertTrue(query2.any(execute=False, exact=False))
2371 self.assertFalse(query2.any(execute=True, exact=False))
2372 self.assertFalse(query2.any(execute=True, exact=True))
2373 self.assertEqual(query2.count(exact=False), 0)
2374 self.assertEqual(query2.count(exact=True), 0)
2375 self.assertFalse(list(query2.explain_no_results()))
2376 # These queries yield no results due to various problems that can be
2377 # spotted prior to execution, yielding helpful diagnostics.
2378 for query, snippets in [
2379 (
2380 # Dataset type name doesn't match any existing dataset types.
2381 registry.queryDatasets("nonexistent", collections=...),
2382 ["nonexistent"],
2383 ),
2384 (
2385 # Dataset type object isn't registered.
2386 registry.queryDatasets(
2387 DatasetType(
2388 "nonexistent",
2389 dimensions=["instrument"],
2390 universe=registry.dimensions,
2391 storageClass="Image",
2392 ),
2393 collections=...
2394 ),
2395 ["nonexistent"],
2396 ),
2397 (
2398 # No datasets of this type in this collection.
2399 registry.queryDatasets("flat", collections=["biases"]),
2400 ["flat", "biases"],
2401 ),
2402 (
2403 # No collections matching at all.
2404 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2405 ["potato"],
2406 ),
2407 ]:
2409 self.assertFalse(query.any(execute=False, exact=False))
2410 self.assertFalse(query.any(execute=True, exact=False))
2411 self.assertFalse(query.any(execute=True, exact=True))
2412 self.assertEqual(query.count(exact=False), 0)
2413 self.assertEqual(query.count(exact=True), 0)
2414 messages = list(query.explain_no_results())
2415 self.assertTrue(messages)
2416 # Want all expected snippets to appear in at least one message.
2417 self.assertTrue(
2418 any(
2419 all(snippet in message for snippet in snippets)
2420 for message in query.explain_no_results()
2421 ),
2422 messages
2423 )
2424 # This query yields four overlaps in the database, but one is filtered
2425 # out in postprocessing. The count queries aren't accurate because
2426 # they don't account for duplication that happens due to an internal
2427 # join against commonSkyPix.
2428 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2429 self.assertEqual(
2430 {
2431 DataCoordinate.standardize(
2432 instrument="Cam1",
2433 skymap="SkyMap1",
2434 visit=v,
2435 tract=t,
2436 universe=registry.dimensions,
2437 )
2438 for v, t in [(1, 0), (2, 0), (2, 1)]
2439 },
2440 set(query3),
2441 )
2442 self.assertTrue(query3.any(execute=False, exact=False))
2443 self.assertTrue(query3.any(execute=True, exact=False))
2444 self.assertTrue(query3.any(execute=True, exact=True))
2445 self.assertGreaterEqual(query3.count(exact=False), 4)
2446 self.assertGreaterEqual(query3.count(exact=True), 3)
2447 self.assertFalse(list(query3.explain_no_results()))
2448 # This query yields overlaps in the database, but all are filtered
2449 # out in postprocessing. The count queries again aren't very useful.
2450 # We have to use `where=` here to avoid an optimization that
2451 # (currently) skips the spatial postprocess-filtering because it
2452 # recognizes that no spatial join is necessary. That's not ideal, but
2453 # fixing it is out of scope for this ticket.
2454 query4 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1",
2455 where="visit=1 AND detector=1 AND tract=0 AND patch=4")
2456 self.assertFalse(set(query4))
2457 self.assertTrue(query4.any(execute=False, exact=False))
2458 self.assertTrue(query4.any(execute=True, exact=False))
2459 self.assertFalse(query4.any(execute=True, exact=True))
2460 self.assertGreaterEqual(query4.count(exact=False), 1)
2461 self.assertEqual(query4.count(exact=True), 0)
2462 messages = list(query4.explain_no_results())
2463 self.assertTrue(messages)
2464 self.assertTrue(
2465 any(
2466 "regions did not overlap" in message
2467 for message in messages
2468 )
2469 )