Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%
1465 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-06 02:34 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from lsst.daf.relation import Relation, RelationalAlgebraError, Transfer, iteration, sql
47from ...core import (
48 DataCoordinate,
49 DataCoordinateSet,
50 DatasetAssociation,
51 DatasetIdGenEnum,
52 DatasetRef,
53 DatasetType,
54 DimensionGraph,
55 NamedValueSet,
56 SkyPixDimension,
57 StorageClass,
58 Timespan,
59 ddl,
60)
61from .._collection_summary import CollectionSummary
62from .._collectionType import CollectionType
63from .._config import RegistryConfig
64from .._exceptions import (
65 ArgumentError,
66 CollectionError,
67 CollectionTypeError,
68 ConflictingDefinitionError,
69 DataIdValueError,
70 DatasetTypeError,
71 InconsistentDataIdError,
72 MissingCollectionError,
73 MissingDatasetTypeError,
74 NoDefaultCollectionError,
75 OrphanedRecordError,
76)
77from ..interfaces import ButlerAttributeExistsError
79if TYPE_CHECKING:
80 from .._registry import Registry
83class RegistryTests(ABC):
84 """Generic tests for the `Registry` class that can be subclassed to
85 generate tests for different configurations.
86 """
88 collectionsManager: Optional[str] = None
89 """Name of the collections manager class, if subclass provides value for
90 this member then it overrides name specified in default configuration
91 (`str`).
92 """
94 datasetsManager: Optional[str | dict[str, str]] = None
95 """Name or configuration dictionary of the datasets manager class, if
96 subclass provides value for this member then it overrides name specified
97 in default configuration (`str` or `dict`).
98 """
100 @classmethod
101 @abstractmethod
102 def getDataDir(cls) -> str:
103 """Return the root directory containing test data YAML files."""
104 raise NotImplementedError()
106 def makeRegistryConfig(self) -> RegistryConfig:
107 """Create RegistryConfig used to create a registry.
109 This method should be called by a subclass from `makeRegistry`.
110 Returned instance will be pre-configured based on the values of class
111 members, and default-configured for all other parameters. Subclasses
112 that need default configuration should just instantiate
113 `RegistryConfig` directly.
114 """
115 config = RegistryConfig()
116 if self.collectionsManager:
117 config["managers", "collections"] = self.collectionsManager
118 if self.datasetsManager:
119 config["managers", "datasets"] = self.datasetsManager
120 return config
122 @abstractmethod
123 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
124 """Return the Registry instance to be tested.
126 Parameters
127 ----------
128 share_repo_with : `Registry`, optional
129 If provided, the new registry should point to the same data
130 repository as this existing registry.
132 Returns
133 -------
134 registry : `Registry`
135 New `Registry` instance, or `None` *only* if `share_repo_with` is
136 not `None` and this test case does not support that argument
137 (e.g. it is impossible with in-memory SQLite DBs).
138 """
139 raise NotImplementedError()
141 def loadData(self, registry: Registry, filename: str):
142 """Load registry test data from ``getDataDir/<filename>``,
143 which should be a YAML import/export file.
144 """
145 from ...transfers import YamlRepoImportBackend
147 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
148 backend = YamlRepoImportBackend(stream, registry)
149 backend.register()
150 backend.load(datastore=None)
152 def checkQueryResults(self, results, expected):
153 """Check that a query results object contains expected values.
155 Parameters
156 ----------
157 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
158 A lazy-evaluation query results object.
159 expected : `list`
160 A list of `DataCoordinate` o `DatasetRef` objects that should be
161 equal to results of the query, aside from ordering.
162 """
163 self.assertCountEqual(list(results), expected)
164 self.assertEqual(results.count(), len(expected))
165 if expected:
166 self.assertTrue(results.any())
167 else:
168 self.assertFalse(results.any())
170 def testOpaque(self):
171 """Tests for `Registry.registerOpaqueTable`,
172 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
173 `Registry.deleteOpaqueData`.
174 """
175 registry = self.makeRegistry()
176 table = "opaque_table_for_testing"
177 registry.registerOpaqueTable(
178 table,
179 spec=ddl.TableSpec(
180 fields=[
181 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
182 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
183 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
184 ],
185 ),
186 )
187 rows = [
188 {"id": 1, "name": "one", "count": None},
189 {"id": 2, "name": "two", "count": 5},
190 {"id": 3, "name": "three", "count": 6},
191 ]
192 registry.insertOpaqueData(table, *rows)
193 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
194 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
195 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
196 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
197 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
198 # Test very long IN clause which exceeds sqlite limit on number of
199 # parameters. SQLite says the limit is 32k but it looks like it is
200 # much higher.
201 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
202 # Two IN clauses, each longer than 1k batch size, first with
203 # duplicates, second has matching elements in different batches (after
204 # sorting).
205 self.assertEqual(
206 rows[0:2],
207 list(
208 registry.fetchOpaqueData(
209 table,
210 id=list(range(1000)) + list(range(100, 0, -1)),
211 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
212 )
213 ),
214 )
215 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
216 registry.deleteOpaqueData(table, id=3)
217 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
218 registry.deleteOpaqueData(table)
219 self.assertEqual([], list(registry.fetchOpaqueData(table)))
221 def testDatasetType(self):
222 """Tests for `Registry.registerDatasetType` and
223 `Registry.getDatasetType`.
224 """
225 registry = self.makeRegistry()
226 # Check valid insert
227 datasetTypeName = "test"
228 storageClass = StorageClass("testDatasetType")
229 registry.storageClasses.registerStorageClass(storageClass)
230 dimensions = registry.dimensions.extract(("instrument", "visit"))
231 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
232 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
233 # Inserting for the first time should return True
234 self.assertTrue(registry.registerDatasetType(inDatasetType))
235 outDatasetType1 = registry.getDatasetType(datasetTypeName)
236 self.assertEqual(outDatasetType1, inDatasetType)
238 # Re-inserting should work
239 self.assertFalse(registry.registerDatasetType(inDatasetType))
240 # Except when they are not identical
241 with self.assertRaises(ConflictingDefinitionError):
242 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
243 registry.registerDatasetType(nonIdenticalDatasetType)
245 # Template can be None
246 datasetTypeName = "testNoneTemplate"
247 storageClass = StorageClass("testDatasetType2")
248 registry.storageClasses.registerStorageClass(storageClass)
249 dimensions = registry.dimensions.extract(("instrument", "visit"))
250 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
251 registry.registerDatasetType(inDatasetType)
252 outDatasetType2 = registry.getDatasetType(datasetTypeName)
253 self.assertEqual(outDatasetType2, inDatasetType)
255 allTypes = set(registry.queryDatasetTypes())
256 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
258 def testDimensions(self):
259 """Tests for `Registry.insertDimensionData`,
260 `Registry.syncDimensionData`, and `Registry.expandDataId`.
261 """
262 registry = self.makeRegistry()
263 dimensionName = "instrument"
264 dimension = registry.dimensions[dimensionName]
265 dimensionValue = {
266 "name": "DummyCam",
267 "visit_max": 10,
268 "visit_system": 0,
269 "exposure_max": 10,
270 "detector_max": 2,
271 "class_name": "lsst.pipe.base.Instrument",
272 }
273 registry.insertDimensionData(dimensionName, dimensionValue)
274 # Inserting the same value twice should fail
275 with self.assertRaises(sqlalchemy.exc.IntegrityError):
276 registry.insertDimensionData(dimensionName, dimensionValue)
277 # expandDataId should retrieve the record we just inserted
278 self.assertEqual(
279 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
280 .records[dimensionName]
281 .toDict(),
282 dimensionValue,
283 )
284 # expandDataId should raise if there is no record with the given ID.
285 with self.assertRaises(DataIdValueError):
286 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
287 # band doesn't have a table; insert should fail.
288 with self.assertRaises(TypeError):
289 registry.insertDimensionData("band", {"band": "i"})
290 dimensionName2 = "physical_filter"
291 dimension2 = registry.dimensions[dimensionName2]
292 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
293 # Missing required dependency ("instrument") should fail
294 with self.assertRaises(KeyError):
295 registry.insertDimensionData(dimensionName2, dimensionValue2)
296 # Adding required dependency should fix the failure
297 dimensionValue2["instrument"] = "DummyCam"
298 registry.insertDimensionData(dimensionName2, dimensionValue2)
299 # expandDataId should retrieve the record we just inserted.
300 self.assertEqual(
301 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
302 .records[dimensionName2]
303 .toDict(),
304 dimensionValue2,
305 )
306 # Use syncDimensionData to insert a new record successfully.
307 dimensionName3 = "detector"
308 dimensionValue3 = {
309 "instrument": "DummyCam",
310 "id": 1,
311 "full_name": "one",
312 "name_in_raft": "zero",
313 "purpose": "SCIENCE",
314 }
315 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
316 # Sync that again. Note that one field ("raft") is NULL, and that
317 # should be okay.
318 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
319 # Now try that sync with the same primary key but a different value.
320 # This should fail.
321 with self.assertRaises(ConflictingDefinitionError):
322 registry.syncDimensionData(
323 dimensionName3,
324 {
325 "instrument": "DummyCam",
326 "id": 1,
327 "full_name": "one",
328 "name_in_raft": "four",
329 "purpose": "SCIENCE",
330 },
331 )
333 @unittest.skipIf(np is None, "numpy not available.")
334 def testNumpyDataId(self):
335 """Test that we can use a numpy int in a dataId."""
336 registry = self.makeRegistry()
337 dimensionEntries = [
338 ("instrument", {"instrument": "DummyCam"}),
339 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
340 # Using an np.int64 here fails unless Records.fromDict is also
341 # patched to look for numbers.Integral
342 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
343 ]
344 for args in dimensionEntries:
345 registry.insertDimensionData(*args)
347 # Try a normal integer and something that looks like an int but
348 # is not.
349 for visit_id in (42, np.int64(42)):
350 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
351 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
352 self.assertEqual(expanded["visit"], int(visit_id))
353 self.assertIsInstance(expanded["visit"], int)
355 def testDataIdRelationships(self):
356 """Test that `Registry.expandDataId` raises an exception when the given
357 keys are inconsistent.
358 """
359 registry = self.makeRegistry()
360 self.loadData(registry, "base.yaml")
361 # Insert a few more dimension records for the next test.
362 registry.insertDimensionData(
363 "exposure",
364 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
365 )
366 registry.insertDimensionData(
367 "exposure",
368 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
369 )
370 registry.insertDimensionData(
371 "visit_system",
372 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
373 )
374 registry.insertDimensionData(
375 "visit",
376 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
377 )
378 registry.insertDimensionData(
379 "visit_definition",
380 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
381 )
382 with self.assertRaises(InconsistentDataIdError):
383 registry.expandDataId(
384 {"instrument": "Cam1", "visit": 1, "exposure": 2},
385 )
387 def testDataset(self):
388 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
389 and `Registry.removeDatasets`.
390 """
391 registry = self.makeRegistry()
392 self.loadData(registry, "base.yaml")
393 run = "tésτ"
394 registry.registerRun(run)
395 datasetType = registry.getDatasetType("bias")
396 dataId = {"instrument": "Cam1", "detector": 2}
397 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
398 outRef = registry.getDataset(ref.id)
399 self.assertIsNotNone(ref.id)
400 self.assertEqual(ref, outRef)
401 with self.assertRaises(ConflictingDefinitionError):
402 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
403 registry.removeDatasets([ref])
404 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
406 def testFindDataset(self):
407 """Tests for `Registry.findDataset`."""
408 registry = self.makeRegistry()
409 self.loadData(registry, "base.yaml")
410 run = "tésτ"
411 datasetType = registry.getDatasetType("bias")
412 dataId = {"instrument": "Cam1", "detector": 4}
413 registry.registerRun(run)
414 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
415 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
416 self.assertEqual(outputRef, inputRef)
417 # Check that retrieval with invalid dataId raises
418 with self.assertRaises(LookupError):
419 dataId = {"instrument": "Cam1"} # no detector
420 registry.findDataset(datasetType, dataId, collections=run)
421 # Check that different dataIds match to different datasets
422 dataId1 = {"instrument": "Cam1", "detector": 1}
423 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
424 dataId2 = {"instrument": "Cam1", "detector": 2}
425 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
426 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
427 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
428 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
429 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
430 # Check that requesting a non-existing dataId returns None
431 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
432 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
433 # Search more than one collection, in which two have the right
434 # dataset type and another does not.
435 registry.registerRun("empty")
436 self.loadData(registry, "datasets-uuid.yaml")
437 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
438 self.assertIsNotNone(bias1)
439 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
440 self.assertIsNotNone(bias2)
441 self.assertEqual(
442 bias1,
443 registry.findDataset(
444 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
445 ),
446 )
447 self.assertEqual(
448 bias2,
449 registry.findDataset(
450 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
451 ),
452 )
453 # Search more than one collection, with one of them a CALIBRATION
454 # collection.
455 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
456 timespan = Timespan(
457 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
458 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
459 )
460 registry.certify("Cam1/calib", [bias2], timespan=timespan)
461 self.assertEqual(
462 bias1,
463 registry.findDataset(
464 "bias",
465 instrument="Cam1",
466 detector=2,
467 collections=["empty", "imported_g", "Cam1/calib"],
468 timespan=timespan,
469 ),
470 )
471 self.assertEqual(
472 bias2,
473 registry.findDataset(
474 "bias",
475 instrument="Cam1",
476 detector=2,
477 collections=["empty", "Cam1/calib", "imported_g"],
478 timespan=timespan,
479 ),
480 )
481 # If we try to search those same collections without a timespan, it
482 # should still work, since the CALIBRATION collection is ignored.
483 self.assertEqual(
484 bias1,
485 registry.findDataset(
486 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
487 ),
488 )
489 self.assertEqual(
490 bias1,
491 registry.findDataset(
492 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
493 ),
494 )
496 def testRemoveDatasetTypeSuccess(self):
497 """Test that Registry.removeDatasetType works when there are no
498 datasets of that type present.
499 """
500 registry = self.makeRegistry()
501 self.loadData(registry, "base.yaml")
502 registry.removeDatasetType("flat")
503 with self.assertRaises(MissingDatasetTypeError):
504 registry.getDatasetType("flat")
506 def testRemoveDatasetTypeFailure(self):
507 """Test that Registry.removeDatasetType raises when there are datasets
508 of that type present or if the dataset type is for a component.
509 """
510 registry = self.makeRegistry()
511 self.loadData(registry, "base.yaml")
512 self.loadData(registry, "datasets.yaml")
513 with self.assertRaises(OrphanedRecordError):
514 registry.removeDatasetType("flat")
515 with self.assertRaises(ValueError):
516 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
518 def testImportDatasetsUUID(self):
519 """Test for `Registry._importDatasets` with UUID dataset ID."""
520 if isinstance(self.datasetsManager, str):
521 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
522 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
523 elif isinstance(self.datasetsManager, dict):
524 if not self.datasetsManager["cls"].endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
525 self.skipTest(f"Unexpected dataset manager {self.datasetsManager['cls']}")
527 registry = self.makeRegistry()
528 self.loadData(registry, "base.yaml")
529 for run in range(6):
530 registry.registerRun(f"run{run}")
531 datasetTypeBias = registry.getDatasetType("bias")
532 datasetTypeFlat = registry.getDatasetType("flat")
533 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
534 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
535 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
537 ref = DatasetRef(datasetTypeBias, dataIdBias1, run="run0")
538 (ref1,) = registry._importDatasets([ref])
539 # UUID is used without change
540 self.assertEqual(ref.id, ref1.id)
542 # All different failure modes
543 refs = (
544 # Importing same DatasetRef with different dataset ID is an error
545 DatasetRef(datasetTypeBias, dataIdBias1, run="run0"),
546 # Same DatasetId but different DataId
547 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
548 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
549 # Same DatasetRef and DatasetId but different run
550 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
551 )
552 for ref in refs:
553 with self.assertRaises(ConflictingDefinitionError):
554 registry._importDatasets([ref])
556 # Test for non-unique IDs, they can be re-imported multiple times.
557 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
558 with self.subTest(idGenMode=idGenMode):
559 # Make dataset ref with reproducible dataset ID.
560 ref = DatasetRef(datasetTypeBias, dataIdBias1, run=f"run{run}", id_generation_mode=idGenMode)
561 (ref1,) = registry._importDatasets([ref])
562 self.assertIsInstance(ref1.id, uuid.UUID)
563 self.assertEqual(ref1.id.version, 5)
564 self.assertEqual(ref1.id, ref.id)
566 # Importing it again is OK
567 (ref2,) = registry._importDatasets([ref1])
568 self.assertEqual(ref2.id, ref1.id)
570 # Cannot import to different run with the same ID
571 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
572 with self.assertRaises(ConflictingDefinitionError):
573 registry._importDatasets([ref])
575 ref = DatasetRef(
576 datasetTypeBias, dataIdBias1, run=f"run{run+1}", id_generation_mode=idGenMode
577 )
578 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
579 # Cannot import same DATAID_TYPE ref into a new run
580 with self.assertRaises(ConflictingDefinitionError):
581 (ref2,) = registry._importDatasets([ref])
582 else:
583 # DATAID_TYPE_RUN ref can be imported into a new run
584 (ref2,) = registry._importDatasets([ref])
586 def testDatasetTypeComponentQueries(self):
587 """Test component options when querying for dataset types.
589 All of the behavior here is deprecated, so many of these tests are
590 currently wrapped in a context to check that we get a warning whenever
591 a component dataset is actually returned.
592 """
593 registry = self.makeRegistry()
594 self.loadData(registry, "base.yaml")
595 self.loadData(registry, "datasets.yaml")
596 # Test querying for dataset types with different inputs.
597 # First query for all dataset types; components should only be included
598 # when components=True.
599 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
600 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
601 with self.assertWarns(FutureWarning):
602 self.assertLess(
603 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
604 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
605 )
606 # Use a pattern that can match either parent or components. Again,
607 # components are only returned if components=True.
608 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
609 self.assertEqual(
610 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
611 )
612 with self.assertWarns(FutureWarning):
613 self.assertLess(
614 {"bias", "bias.wcs"},
615 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
616 )
617 # This pattern matches only a component. In this case we also return
618 # that component dataset type if components=None.
619 with self.assertWarns(FutureWarning):
620 self.assertEqual(
621 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
622 )
623 self.assertEqual(
624 set(),
625 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
626 )
627 with self.assertWarns(FutureWarning):
628 self.assertEqual(
629 {"bias.wcs"},
630 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
631 )
632 # Add a dataset type using a StorageClass that we'll then remove; check
633 # that this does not affect our ability to query for dataset types
634 # (though it will warn).
635 tempStorageClass = StorageClass(
636 name="TempStorageClass",
637 components={
638 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
639 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
640 },
641 )
642 registry.storageClasses.registerStorageClass(tempStorageClass)
643 datasetType = DatasetType(
644 "temporary",
645 dimensions=["instrument"],
646 storageClass=tempStorageClass,
647 universe=registry.dimensions,
648 )
649 registry.registerDatasetType(datasetType)
650 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
651 datasetType._storageClass = None
652 del tempStorageClass
653 # Querying for all dataset types, including components, should include
654 # at least all non-component dataset types (and I don't want to
655 # enumerate all of the Exposure components for bias and flat here).
656 with self.assertWarns(FutureWarning):
657 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
658 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
659 self.assertIn("TempStorageClass", cm.output[0])
660 self.assertLess({"bias", "flat", "temporary"}, everything.names)
661 # It should not include "temporary.columns", because we tried to remove
662 # the storage class that would tell it about that. So if the next line
663 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
664 # this part of the test isn't doing anything, because the _unregister
665 # call about isn't simulating the real-life case we want it to
666 # simulate, in which different versions of daf_butler in entirely
667 # different Python processes interact with the same repo.
668 self.assertNotIn("temporary.data", everything.names)
669 # Query for dataset types that start with "temp". This should again
670 # not include the component, and also not fail.
671 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
672 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
673 self.assertIn("TempStorageClass", cm.output[0])
674 self.assertEqual({"temporary"}, startsWithTemp.names)
675 # Querying with no components should not warn at all.
676 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
677 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
678 # Must issue a warning of our own to be captured.
679 logging.getLogger("lsst.daf.butler.registries").warning("test message")
680 self.assertEqual(len(cm.output), 1)
681 self.assertIn("test message", cm.output[0])
683 def testComponentLookups(self):
684 """Test searching for component datasets via their parents.
686 All of the behavior here is deprecated, so many of these tests are
687 currently wrapped in a context to check that we get a warning whenever
688 a component dataset is actually returned.
689 """
690 registry = self.makeRegistry()
691 self.loadData(registry, "base.yaml")
692 self.loadData(registry, "datasets.yaml")
693 # Test getting the child dataset type (which does still exist in the
694 # Registry), and check for consistency with
695 # DatasetRef.makeComponentRef.
696 collection = "imported_g"
697 parentType = registry.getDatasetType("bias")
698 childType = registry.getDatasetType("bias.wcs")
699 parentRefResolved = registry.findDataset(
700 parentType, collections=collection, instrument="Cam1", detector=1
701 )
702 self.assertIsInstance(parentRefResolved, DatasetRef)
703 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
704 # Search for a single dataset with findDataset.
705 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
706 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
707 # Search for detector data IDs constrained by component dataset
708 # existence with queryDataIds.
709 with self.assertWarns(FutureWarning):
710 dataIds = registry.queryDataIds(
711 ["detector"],
712 datasets=["bias.wcs"],
713 collections=collection,
714 ).toSet()
715 self.assertEqual(
716 dataIds,
717 DataCoordinateSet(
718 {
719 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
720 for d in (1, 2, 3)
721 },
722 parentType.dimensions,
723 ),
724 )
725 # Search for multiple datasets of a single type with queryDatasets.
726 with self.assertWarns(FutureWarning):
727 childRefs2 = set(
728 registry.queryDatasets(
729 "bias.wcs",
730 collections=collection,
731 )
732 )
733 self.assertEqual({ref.datasetType for ref in childRefs2}, {childType})
734 self.assertEqual({ref.dataId for ref in childRefs2}, set(dataIds))
736 def testCollections(self):
737 """Tests for registry methods that manage collections."""
738 registry = self.makeRegistry()
739 other_registry = self.makeRegistry(share_repo_with=registry)
740 self.loadData(registry, "base.yaml")
741 self.loadData(registry, "datasets.yaml")
742 run1 = "imported_g"
743 run2 = "imported_r"
744 # Test setting a collection docstring after it has been created.
745 registry.setCollectionDocumentation(run1, "doc for run1")
746 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
747 registry.setCollectionDocumentation(run1, None)
748 self.assertIsNone(registry.getCollectionDocumentation(run1))
749 datasetType = "bias"
750 # Find some datasets via their run's collection.
751 dataId1 = {"instrument": "Cam1", "detector": 1}
752 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
753 self.assertIsNotNone(ref1)
754 dataId2 = {"instrument": "Cam1", "detector": 2}
755 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
756 self.assertIsNotNone(ref2)
757 # Associate those into a new collection, then look for them there.
758 tag1 = "tag1"
759 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
760 # Check that we can query for old and new collections by type.
761 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
762 self.assertEqual(
763 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
764 {tag1, run1, run2},
765 )
766 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
767 registry.associate(tag1, [ref1, ref2])
768 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
769 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
770 # Disassociate one and verify that we can't it there anymore...
771 registry.disassociate(tag1, [ref1])
772 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
773 # ...but we can still find ref2 in tag1, and ref1 in the run.
774 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
775 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
776 collections = set(registry.queryCollections())
777 self.assertEqual(collections, {run1, run2, tag1})
778 # Associate both refs into tag1 again; ref2 is already there, but that
779 # should be a harmless no-op.
780 registry.associate(tag1, [ref1, ref2])
781 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
782 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
783 # Get a different dataset (from a different run) that has the same
784 # dataset type and data ID as ref2.
785 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
786 self.assertNotEqual(ref2, ref2b)
787 # Attempting to associate that into tag1 should be an error.
788 with self.assertRaises(ConflictingDefinitionError):
789 registry.associate(tag1, [ref2b])
790 # That error shouldn't have messed up what we had before.
791 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
792 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
793 # Attempt to associate the conflicting dataset again, this time with
794 # a dataset that isn't in the collection and won't cause a conflict.
795 # Should also fail without modifying anything.
796 dataId3 = {"instrument": "Cam1", "detector": 3}
797 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
798 with self.assertRaises(ConflictingDefinitionError):
799 registry.associate(tag1, [ref3, ref2b])
800 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
801 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
802 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
803 # Register a chained collection that searches [tag1, run2]
804 chain1 = "chain1"
805 registry.registerCollection(chain1, type=CollectionType.CHAINED)
806 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
807 # Chained collection exists, but has no collections in it.
808 self.assertFalse(registry.getCollectionChain(chain1))
809 # If we query for all collections, we should get the chained collection
810 # only if we don't ask to flatten it (i.e. yield only its children).
811 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
812 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
813 # Attempt to set its child collections to something circular; that
814 # should fail.
815 with self.assertRaises(ValueError):
816 registry.setCollectionChain(chain1, [tag1, chain1])
817 # Add the child collections.
818 registry.setCollectionChain(chain1, [tag1, run2])
819 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
820 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
821 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
822 # Refresh the other registry that points to the same repo, and make
823 # sure it can see the things we've done (note that this does require
824 # an explicit refresh(); that's the documented behavior, because
825 # caching is ~impossible otherwise).
826 if other_registry is not None:
827 other_registry.refresh()
828 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
829 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
830 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
831 # Searching for dataId1 or dataId2 in the chain should return ref1 and
832 # ref2, because both are in tag1.
833 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
834 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
835 # Now disassociate ref2 from tag1. The search (for bias) with
836 # dataId2 in chain1 should then:
837 # 1. not find it in tag1
838 # 2. find a different dataset in run2
839 registry.disassociate(tag1, [ref2])
840 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
841 self.assertNotEqual(ref2b, ref2)
842 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
843 # Define a new chain so we can test recursive chains.
844 chain2 = "chain2"
845 registry.registerCollection(chain2, type=CollectionType.CHAINED)
846 registry.setCollectionChain(chain2, [run2, chain1])
847 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
848 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
849 # Query for collections matching a regex.
850 self.assertCountEqual(
851 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
852 ["imported_r", "imported_g"],
853 )
854 # Query for collections matching a regex or an explicit str.
855 self.assertCountEqual(
856 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
857 ["imported_r", "imported_g", "chain1"],
858 )
859 # Search for bias with dataId1 should find it via tag1 in chain2,
860 # recursing, because is not in run1.
861 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
862 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
863 # Search for bias with dataId2 should find it in run2 (ref2b).
864 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
865 # Search for a flat that is in run2. That should not be found
866 # at the front of chain2, because of the restriction to bias
867 # on run2 there, but it should be found in at the end of chain1.
868 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
869 ref4 = registry.findDataset("flat", dataId4, collections=run2)
870 self.assertIsNotNone(ref4)
871 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
872 # Deleting a collection that's part of a CHAINED collection is not
873 # allowed, and is exception-safe.
874 with self.assertRaises(Exception):
875 registry.removeCollection(run2)
876 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
877 with self.assertRaises(Exception):
878 registry.removeCollection(chain1)
879 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
880 # Actually remove chain2, test that it's gone by asking for its type.
881 registry.removeCollection(chain2)
882 with self.assertRaises(MissingCollectionError):
883 registry.getCollectionType(chain2)
884 # Actually remove run2 and chain1, which should work now.
885 registry.removeCollection(chain1)
886 registry.removeCollection(run2)
887 with self.assertRaises(MissingCollectionError):
888 registry.getCollectionType(run2)
889 with self.assertRaises(MissingCollectionError):
890 registry.getCollectionType(chain1)
891 # Remove tag1 as well, just to test that we can remove TAGGED
892 # collections.
893 registry.removeCollection(tag1)
894 with self.assertRaises(MissingCollectionError):
895 registry.getCollectionType(tag1)
897 def testCollectionChainFlatten(self):
898 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
899 registry = self.makeRegistry()
900 registry.registerCollection("inner", CollectionType.CHAINED)
901 registry.registerCollection("innermost", CollectionType.RUN)
902 registry.setCollectionChain("inner", ["innermost"])
903 registry.registerCollection("outer", CollectionType.CHAINED)
904 registry.setCollectionChain("outer", ["inner"], flatten=False)
905 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
906 registry.setCollectionChain("outer", ["inner"], flatten=True)
907 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
909 def testBasicTransaction(self):
910 """Test that all operations within a single transaction block are
911 rolled back if an exception propagates out of the block.
912 """
913 registry = self.makeRegistry()
914 storageClass = StorageClass("testDatasetType")
915 registry.storageClasses.registerStorageClass(storageClass)
916 with registry.transaction():
917 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
918 with self.assertRaises(ValueError):
919 with registry.transaction():
920 registry.insertDimensionData("instrument", {"name": "Cam2"})
921 raise ValueError("Oops, something went wrong")
922 # Cam1 should exist
923 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
924 # But Cam2 and Cam3 should both not exist
925 with self.assertRaises(DataIdValueError):
926 registry.expandDataId(instrument="Cam2")
927 with self.assertRaises(DataIdValueError):
928 registry.expandDataId(instrument="Cam3")
930 def testNestedTransaction(self):
931 """Test that operations within a transaction block are not rolled back
932 if an exception propagates out of an inner transaction block and is
933 then caught.
934 """
935 registry = self.makeRegistry()
936 dimension = registry.dimensions["instrument"]
937 dataId1 = {"instrument": "DummyCam"}
938 dataId2 = {"instrument": "DummyCam2"}
939 checkpointReached = False
940 with registry.transaction():
941 # This should be added and (ultimately) committed.
942 registry.insertDimensionData(dimension, dataId1)
943 with self.assertRaises(sqlalchemy.exc.IntegrityError):
944 with registry.transaction(savepoint=True):
945 # This does not conflict, and should succeed (but not
946 # be committed).
947 registry.insertDimensionData(dimension, dataId2)
948 checkpointReached = True
949 # This should conflict and raise, triggerring a rollback
950 # of the previous insertion within the same transaction
951 # context, but not the original insertion in the outer
952 # block.
953 registry.insertDimensionData(dimension, dataId1)
954 self.assertTrue(checkpointReached)
955 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
956 with self.assertRaises(DataIdValueError):
957 registry.expandDataId(dataId2, graph=dimension.graph)
959 def testInstrumentDimensions(self):
960 """Test queries involving only instrument dimensions, with no joins to
961 skymap."""
962 registry = self.makeRegistry()
964 # need a bunch of dimensions and datasets for test
965 registry.insertDimensionData(
966 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
967 )
968 registry.insertDimensionData(
969 "physical_filter",
970 dict(instrument="DummyCam", name="dummy_r", band="r"),
971 dict(instrument="DummyCam", name="dummy_i", band="i"),
972 )
973 registry.insertDimensionData(
974 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
975 )
976 registry.insertDimensionData(
977 "visit_system",
978 dict(instrument="DummyCam", id=1, name="default"),
979 )
980 registry.insertDimensionData(
981 "visit",
982 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
983 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
984 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
985 )
986 for i in range(1, 6):
987 registry.insertDimensionData(
988 "visit_detector_region",
989 dict(instrument="DummyCam", visit=10, detector=i),
990 dict(instrument="DummyCam", visit=11, detector=i),
991 dict(instrument="DummyCam", visit=20, detector=i),
992 )
993 registry.insertDimensionData(
994 "exposure",
995 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
996 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
997 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
998 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
999 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
1000 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
1001 )
1002 registry.insertDimensionData(
1003 "visit_definition",
1004 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1005 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1006 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1007 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1008 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1009 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1010 )
1011 # dataset types
1012 run1 = "test1_r"
1013 run2 = "test2_r"
1014 tagged2 = "test2_t"
1015 registry.registerRun(run1)
1016 registry.registerRun(run2)
1017 registry.registerCollection(tagged2)
1018 storageClass = StorageClass("testDataset")
1019 registry.storageClasses.registerStorageClass(storageClass)
1020 rawType = DatasetType(
1021 name="RAW",
1022 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1023 storageClass=storageClass,
1024 )
1025 registry.registerDatasetType(rawType)
1026 calexpType = DatasetType(
1027 name="CALEXP",
1028 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1029 storageClass=storageClass,
1030 )
1031 registry.registerDatasetType(calexpType)
1033 # add pre-existing datasets
1034 for exposure in (100, 101, 110, 111):
1035 for detector in (1, 2, 3):
1036 # note that only 3 of 5 detectors have datasets
1037 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1038 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1039 # exposures 100 and 101 appear in both run1 and tagged2.
1040 # 100 has different datasets in the different collections
1041 # 101 has the same dataset in both collections.
1042 if exposure == 100:
1043 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1044 if exposure in (100, 101):
1045 registry.associate(tagged2, [ref])
1046 # Add pre-existing datasets to tagged2.
1047 for exposure in (200, 201):
1048 for detector in (3, 4, 5):
1049 # note that only 3 of 5 detectors have datasets
1050 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1051 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1052 registry.associate(tagged2, [ref])
1054 dimensions = DimensionGraph(
1055 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1056 )
1057 # Test that single dim string works as well as list of str
1058 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1059 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1060 self.assertEqual(rows, rowsI)
1061 # with empty expression
1062 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1063 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1064 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1065 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1066 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1068 # second collection
1069 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1070 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1071 for dataId in rows:
1072 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1073 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1074 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1075 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1077 # with two input datasets
1078 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1079 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1080 for dataId in rows:
1081 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1082 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1083 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1084 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1086 # limit to single visit
1087 rows = registry.queryDataIds(
1088 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1089 ).toSet()
1090 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1091 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1092 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1093 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1095 # more limiting expression, using link names instead of Table.column
1096 rows = registry.queryDataIds(
1097 dimensions,
1098 datasets=rawType,
1099 collections=run1,
1100 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1101 ).toSet()
1102 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1103 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1104 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1105 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1107 # queryDataIds with only one of `datasets` and `collections` is an
1108 # error.
1109 with self.assertRaises(CollectionError):
1110 registry.queryDataIds(dimensions, datasets=rawType)
1111 with self.assertRaises(ArgumentError):
1112 registry.queryDataIds(dimensions, collections=run1)
1114 # expression excludes everything
1115 rows = registry.queryDataIds(
1116 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1117 ).toSet()
1118 self.assertEqual(len(rows), 0)
1120 # Selecting by physical_filter, this is not in the dimensions, but it
1121 # is a part of the full expression so it should work too.
1122 rows = registry.queryDataIds(
1123 dimensions,
1124 datasets=rawType,
1125 collections=run1,
1126 where="physical_filter = 'dummy_r'",
1127 instrument="DummyCam",
1128 ).toSet()
1129 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1130 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1131 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1132 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1134 def testSkyMapDimensions(self):
1135 """Tests involving only skymap dimensions, no joins to instrument."""
1136 registry = self.makeRegistry()
1138 # need a bunch of dimensions and datasets for test, we want
1139 # "band" in the test so also have to add physical_filter
1140 # dimensions
1141 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1142 registry.insertDimensionData(
1143 "physical_filter",
1144 dict(instrument="DummyCam", name="dummy_r", band="r"),
1145 dict(instrument="DummyCam", name="dummy_i", band="i"),
1146 )
1147 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1148 for tract in range(10):
1149 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1150 registry.insertDimensionData(
1151 "patch",
1152 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1153 )
1155 # dataset types
1156 run = "tésτ"
1157 registry.registerRun(run)
1158 storageClass = StorageClass("testDataset")
1159 registry.storageClasses.registerStorageClass(storageClass)
1160 calexpType = DatasetType(
1161 name="deepCoadd_calexp",
1162 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1163 storageClass=storageClass,
1164 )
1165 registry.registerDatasetType(calexpType)
1166 mergeType = DatasetType(
1167 name="deepCoadd_mergeDet",
1168 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1169 storageClass=storageClass,
1170 )
1171 registry.registerDatasetType(mergeType)
1172 measType = DatasetType(
1173 name="deepCoadd_meas",
1174 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1175 storageClass=storageClass,
1176 )
1177 registry.registerDatasetType(measType)
1179 dimensions = DimensionGraph(
1180 registry.dimensions,
1181 dimensions=(
1182 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1183 ),
1184 )
1186 # add pre-existing datasets
1187 for tract in (1, 3, 5):
1188 for patch in (2, 4, 6, 7):
1189 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1190 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1191 for aFilter in ("i", "r"):
1192 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1193 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1195 # with empty expression
1196 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1197 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1198 for dataId in rows:
1199 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1200 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1201 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1202 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1204 # limit to 2 tracts and 2 patches
1205 rows = registry.queryDataIds(
1206 dimensions,
1207 datasets=[calexpType, mergeType],
1208 collections=run,
1209 where="tract IN (1, 5) AND patch IN (2, 7)",
1210 skymap="DummyMap",
1211 ).toSet()
1212 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1213 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1214 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1215 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1217 # limit to single filter
1218 rows = registry.queryDataIds(
1219 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1220 ).toSet()
1221 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1222 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1223 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1224 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1226 # Specifying non-existing skymap is an exception
1227 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1228 rows = registry.queryDataIds(
1229 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1230 ).toSet()
1232 def testSpatialJoin(self):
1233 """Test queries that involve spatial overlap joins."""
1234 registry = self.makeRegistry()
1235 self.loadData(registry, "hsc-rc2-subset.yaml")
1237 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1238 # the TopologicalFamily they belong to. We'll relate all elements in
1239 # each family to all of the elements in each other family.
1240 families = defaultdict(set)
1241 # Dictionary of {element.name: {dataId: region}}.
1242 regions = {}
1243 for element in registry.dimensions.getDatabaseElements():
1244 if element.spatial is not None:
1245 families[element.spatial.name].add(element)
1246 regions[element.name] = {
1247 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1248 }
1250 # If this check fails, it's not necessarily a problem - it may just be
1251 # a reasonable change to the default dimension definitions - but the
1252 # test below depends on there being more than one family to do anything
1253 # useful.
1254 self.assertEqual(len(families), 2)
1256 # Overlap DatabaseDimensionElements with each other.
1257 for family1, family2 in itertools.combinations(families, 2):
1258 for element1, element2 in itertools.product(families[family1], families[family2]):
1259 graph = DimensionGraph.union(element1.graph, element2.graph)
1260 # Construct expected set of overlapping data IDs via a
1261 # brute-force comparison of the regions we've already fetched.
1262 expected = {
1263 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1264 for (dataId1, region1), (dataId2, region2) in itertools.product(
1265 regions[element1.name].items(), regions[element2.name].items()
1266 )
1267 if not region1.isDisjointFrom(region2)
1268 }
1269 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1270 queried = set(registry.queryDataIds(graph))
1271 self.assertEqual(expected, queried)
1273 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1274 commonSkyPix = registry.dimensions.commonSkyPix
1275 for elementName, regions in regions.items():
1276 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1277 expected = set()
1278 for dataId, region in regions.items():
1279 for begin, end in commonSkyPix.pixelization.envelope(region):
1280 expected.update(
1281 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1282 for index in range(begin, end)
1283 )
1284 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1285 queried = set(registry.queryDataIds(graph))
1286 self.assertEqual(expected, queried)
1288 def testAbstractQuery(self):
1289 """Test that we can run a query that just lists the known
1290 bands. This is tricky because band is
1291 backed by a query against physical_filter.
1292 """
1293 registry = self.makeRegistry()
1294 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1295 registry.insertDimensionData(
1296 "physical_filter",
1297 dict(instrument="DummyCam", name="dummy_i", band="i"),
1298 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1299 dict(instrument="DummyCam", name="dummy_r", band="r"),
1300 )
1301 rows = registry.queryDataIds(["band"]).toSet()
1302 self.assertCountEqual(
1303 rows,
1304 [
1305 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1306 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1307 ],
1308 )
1310 def testAttributeManager(self):
1311 """Test basic functionality of attribute manager."""
1312 # number of attributes with schema versions in a fresh database,
1313 # 6 managers with 2 records per manager, plus config for dimensions
1314 VERSION_COUNT = 6 * 2 + 1
1316 registry = self.makeRegistry()
1317 attributes = registry._managers.attributes
1319 # check what get() returns for non-existing key
1320 self.assertIsNone(attributes.get("attr"))
1321 self.assertEqual(attributes.get("attr", ""), "")
1322 self.assertEqual(attributes.get("attr", "Value"), "Value")
1323 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1325 # cannot store empty key or value
1326 with self.assertRaises(ValueError):
1327 attributes.set("", "value")
1328 with self.assertRaises(ValueError):
1329 attributes.set("attr", "")
1331 # set value of non-existing key
1332 attributes.set("attr", "value")
1333 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1334 self.assertEqual(attributes.get("attr"), "value")
1336 # update value of existing key
1337 with self.assertRaises(ButlerAttributeExistsError):
1338 attributes.set("attr", "value2")
1340 attributes.set("attr", "value2", force=True)
1341 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1342 self.assertEqual(attributes.get("attr"), "value2")
1344 # delete existing key
1345 self.assertTrue(attributes.delete("attr"))
1346 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1348 # delete non-existing key
1349 self.assertFalse(attributes.delete("non-attr"))
1351 # store bunch of keys and get the list back
1352 data = [
1353 ("version.core", "1.2.3"),
1354 ("version.dimensions", "3.2.1"),
1355 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1356 ]
1357 for key, value in data:
1358 attributes.set(key, value)
1359 items = dict(attributes.items())
1360 for key, value in data:
1361 self.assertEqual(items[key], value)
1363 def testQueryDatasetsDeduplication(self):
1364 """Test that the findFirst option to queryDatasets selects datasets
1365 from collections in the order given".
1366 """
1367 registry = self.makeRegistry()
1368 self.loadData(registry, "base.yaml")
1369 self.loadData(registry, "datasets.yaml")
1370 self.assertCountEqual(
1371 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1372 [
1373 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1374 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1375 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1376 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1377 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1378 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1379 ],
1380 )
1381 self.assertCountEqual(
1382 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1383 [
1384 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1385 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1386 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1387 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1388 ],
1389 )
1390 self.assertCountEqual(
1391 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1392 [
1393 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1394 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1395 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1396 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1397 ],
1398 )
1400 def testQueryResults(self):
1401 """Test querying for data IDs and then manipulating the QueryResults
1402 object returned to perform other queries.
1403 """
1404 registry = self.makeRegistry()
1405 self.loadData(registry, "base.yaml")
1406 self.loadData(registry, "datasets.yaml")
1407 bias = registry.getDatasetType("bias")
1408 flat = registry.getDatasetType("flat")
1409 # Obtain expected results from methods other than those we're testing
1410 # here. That includes:
1411 # - the dimensions of the data IDs we want to query:
1412 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1413 # - the dimensions of some other data IDs we'll extract from that:
1414 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1415 # - the data IDs we expect to obtain from the first queries:
1416 expectedDataIds = DataCoordinateSet(
1417 {
1418 DataCoordinate.standardize(
1419 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1420 )
1421 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1422 },
1423 graph=expectedGraph,
1424 hasFull=False,
1425 hasRecords=False,
1426 )
1427 # - the flat datasets we expect to find from those data IDs, in just
1428 # one collection (so deduplication is irrelevant):
1429 expectedFlats = [
1430 registry.findDataset(
1431 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1432 ),
1433 registry.findDataset(
1434 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1435 ),
1436 registry.findDataset(
1437 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1438 ),
1439 ]
1440 # - the data IDs we expect to extract from that:
1441 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1442 # - the bias datasets we expect to find from those data IDs, after we
1443 # subset-out the physical_filter dimension, both with duplicates:
1444 expectedAllBiases = [
1445 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1446 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1447 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1448 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1449 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1450 ]
1451 # - ...and without duplicates:
1452 expectedDeduplicatedBiases = [
1453 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1456 ]
1457 # Test against those expected results, using a "lazy" query for the
1458 # data IDs (which re-executes that query each time we use it to do
1459 # something new).
1460 dataIds = registry.queryDataIds(
1461 ["detector", "physical_filter"],
1462 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1463 instrument="Cam1",
1464 )
1465 self.assertEqual(dataIds.graph, expectedGraph)
1466 self.assertEqual(dataIds.toSet(), expectedDataIds)
1467 self.assertCountEqual(
1468 list(
1469 dataIds.findDatasets(
1470 flat,
1471 collections=["imported_r"],
1472 )
1473 ),
1474 expectedFlats,
1475 )
1476 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1477 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1478 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1479 self.assertCountEqual(
1480 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1481 expectedAllBiases,
1482 )
1483 self.assertCountEqual(
1484 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1485 expectedDeduplicatedBiases,
1486 )
1488 # Check dimensions match.
1489 with self.assertRaises(ValueError):
1490 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1492 # Use a component dataset type.
1493 self.assertCountEqual(
1494 [
1495 ref.makeComponentRef("image")
1496 for ref in subsetDataIds.findDatasets(
1497 bias,
1498 collections=["imported_r", "imported_g"],
1499 findFirst=False,
1500 )
1501 ],
1502 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1503 )
1505 # Use a named dataset type that does not exist and a dataset type
1506 # object that does not exist.
1507 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1509 # Test both string name and dataset type object.
1510 test_type: Union[str, DatasetType]
1511 for test_type, test_type_name in (
1512 (unknown_type, unknown_type.name),
1513 (unknown_type.name, unknown_type.name),
1514 ):
1515 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1516 list(
1517 subsetDataIds.findDatasets(
1518 test_type, collections=["imported_r", "imported_g"], findFirst=True
1519 )
1520 )
1522 # Materialize the bias dataset queries (only) by putting the results
1523 # into temporary tables, then repeat those tests.
1524 with subsetDataIds.findDatasets(
1525 bias, collections=["imported_r", "imported_g"], findFirst=False
1526 ).materialize() as biases:
1527 self.assertCountEqual(list(biases), expectedAllBiases)
1528 with subsetDataIds.findDatasets(
1529 bias, collections=["imported_r", "imported_g"], findFirst=True
1530 ).materialize() as biases:
1531 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1532 # Materialize the data ID subset query, but not the dataset queries.
1533 with subsetDataIds.materialize() as subsetDataIds:
1534 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1535 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1536 self.assertCountEqual(
1537 list(
1538 subsetDataIds.findDatasets(
1539 bias, collections=["imported_r", "imported_g"], findFirst=False
1540 )
1541 ),
1542 expectedAllBiases,
1543 )
1544 self.assertCountEqual(
1545 list(
1546 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1547 ),
1548 expectedDeduplicatedBiases,
1549 )
1550 # Materialize the dataset queries, too.
1551 with subsetDataIds.findDatasets(
1552 bias, collections=["imported_r", "imported_g"], findFirst=False
1553 ).materialize() as biases:
1554 self.assertCountEqual(list(biases), expectedAllBiases)
1555 with subsetDataIds.findDatasets(
1556 bias, collections=["imported_r", "imported_g"], findFirst=True
1557 ).materialize() as biases:
1558 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1559 # Materialize the original query, but none of the follow-up queries.
1560 with dataIds.materialize() as dataIds:
1561 self.assertEqual(dataIds.graph, expectedGraph)
1562 self.assertEqual(dataIds.toSet(), expectedDataIds)
1563 self.assertCountEqual(
1564 list(
1565 dataIds.findDatasets(
1566 flat,
1567 collections=["imported_r"],
1568 )
1569 ),
1570 expectedFlats,
1571 )
1572 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1573 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1574 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1575 self.assertCountEqual(
1576 list(
1577 subsetDataIds.findDatasets(
1578 bias, collections=["imported_r", "imported_g"], findFirst=False
1579 )
1580 ),
1581 expectedAllBiases,
1582 )
1583 self.assertCountEqual(
1584 list(
1585 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1586 ),
1587 expectedDeduplicatedBiases,
1588 )
1589 # Materialize just the bias dataset queries.
1590 with subsetDataIds.findDatasets(
1591 bias, collections=["imported_r", "imported_g"], findFirst=False
1592 ).materialize() as biases:
1593 self.assertCountEqual(list(biases), expectedAllBiases)
1594 with subsetDataIds.findDatasets(
1595 bias, collections=["imported_r", "imported_g"], findFirst=True
1596 ).materialize() as biases:
1597 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1598 # Materialize the subset data ID query, but not the dataset
1599 # queries.
1600 with subsetDataIds.materialize() as subsetDataIds:
1601 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1602 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1603 self.assertCountEqual(
1604 list(
1605 subsetDataIds.findDatasets(
1606 bias, collections=["imported_r", "imported_g"], findFirst=False
1607 )
1608 ),
1609 expectedAllBiases,
1610 )
1611 self.assertCountEqual(
1612 list(
1613 subsetDataIds.findDatasets(
1614 bias, collections=["imported_r", "imported_g"], findFirst=True
1615 )
1616 ),
1617 expectedDeduplicatedBiases,
1618 )
1619 # Materialize the bias dataset queries, too, so now we're
1620 # materializing every single step.
1621 with subsetDataIds.findDatasets(
1622 bias, collections=["imported_r", "imported_g"], findFirst=False
1623 ).materialize() as biases:
1624 self.assertCountEqual(list(biases), expectedAllBiases)
1625 with subsetDataIds.findDatasets(
1626 bias, collections=["imported_r", "imported_g"], findFirst=True
1627 ).materialize() as biases:
1628 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1630 def testStorageClassPropagation(self):
1631 """Test that queries for datasets respect the storage class passed in
1632 as part of a full dataset type.
1633 """
1634 registry = self.makeRegistry()
1635 self.loadData(registry, "base.yaml")
1636 dataset_type_in_registry = DatasetType(
1637 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions
1638 )
1639 registry.registerDatasetType(dataset_type_in_registry)
1640 run = "run1"
1641 registry.registerRun(run)
1642 (inserted_ref,) = registry.insertDatasets(
1643 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1644 )
1645 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1646 query_dataset_type = DatasetType(
1647 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions
1648 )
1649 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1650 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1651 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1652 (query_datasets_ref,) = query_datasets_result
1653 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1654 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1655 query_dataset_type, collections=[run]
1656 )
1657 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1658 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1659 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1660 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1661 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1662 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1663 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1665 def testEmptyDimensionsQueries(self):
1666 """Test Query and QueryResults objects in the case where there are no
1667 dimensions.
1668 """
1669 # Set up test data: one dataset type, two runs, one dataset in each.
1670 registry = self.makeRegistry()
1671 self.loadData(registry, "base.yaml")
1672 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1673 registry.registerDatasetType(schema)
1674 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1675 run1 = "run1"
1676 run2 = "run2"
1677 registry.registerRun(run1)
1678 registry.registerRun(run2)
1679 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1680 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1681 # Query directly for both of the datasets, and each one, one at a time.
1682 self.checkQueryResults(
1683 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1684 )
1685 self.checkQueryResults(
1686 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1687 [dataset1],
1688 )
1689 self.checkQueryResults(
1690 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1691 [dataset2],
1692 )
1693 # Query for data IDs with no dimensions.
1694 dataIds = registry.queryDataIds([])
1695 self.checkQueryResults(dataIds, [dataId])
1696 # Use queried data IDs to find the datasets.
1697 self.checkQueryResults(
1698 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1699 [dataset1, dataset2],
1700 )
1701 self.checkQueryResults(
1702 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1703 [dataset1],
1704 )
1705 self.checkQueryResults(
1706 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1707 [dataset2],
1708 )
1709 # Now materialize the data ID query results and repeat those tests.
1710 with dataIds.materialize() as dataIds:
1711 self.checkQueryResults(dataIds, [dataId])
1712 self.checkQueryResults(
1713 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1714 [dataset1],
1715 )
1716 self.checkQueryResults(
1717 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1718 [dataset2],
1719 )
1720 # Query for non-empty data IDs, then subset that to get the empty one.
1721 # Repeat the above tests starting from that.
1722 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1723 self.checkQueryResults(dataIds, [dataId])
1724 self.checkQueryResults(
1725 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1726 [dataset1, dataset2],
1727 )
1728 self.checkQueryResults(
1729 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1730 [dataset1],
1731 )
1732 self.checkQueryResults(
1733 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1734 [dataset2],
1735 )
1736 with dataIds.materialize() as dataIds:
1737 self.checkQueryResults(dataIds, [dataId])
1738 self.checkQueryResults(
1739 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1740 [dataset1, dataset2],
1741 )
1742 self.checkQueryResults(
1743 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1744 [dataset1],
1745 )
1746 self.checkQueryResults(
1747 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1748 [dataset2],
1749 )
1750 # Query for non-empty data IDs, then materialize, then subset to get
1751 # the empty one. Repeat again.
1752 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1753 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1754 self.checkQueryResults(dataIds, [dataId])
1755 self.checkQueryResults(
1756 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1757 [dataset1, dataset2],
1758 )
1759 self.checkQueryResults(
1760 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1761 [dataset1],
1762 )
1763 self.checkQueryResults(
1764 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1765 [dataset2],
1766 )
1767 with dataIds.materialize() as dataIds:
1768 self.checkQueryResults(dataIds, [dataId])
1769 self.checkQueryResults(
1770 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1771 [dataset1, dataset2],
1772 )
1773 self.checkQueryResults(
1774 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1775 [dataset1],
1776 )
1777 self.checkQueryResults(
1778 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1779 [dataset2],
1780 )
1781 # Query for non-empty data IDs with a constraint on an empty-data-ID
1782 # dataset that exists.
1783 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1784 self.checkQueryResults(
1785 dataIds.subset(unique=True),
1786 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1787 )
1788 # Again query for non-empty data IDs with a constraint on empty-data-ID
1789 # datasets, but when the datasets don't exist. We delete the existing
1790 # dataset and query just that collection rather than creating a new
1791 # empty collection because this is a bit less likely for our build-time
1792 # logic to shortcut-out (via the collection summaries), and such a
1793 # shortcut would make this test a bit more trivial than we'd like.
1794 registry.removeDatasets([dataset2])
1795 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1796 self.checkQueryResults(dataIds, [])
1798 def testDimensionDataModifications(self):
1799 """Test that modifying dimension records via:
1800 syncDimensionData(..., update=True) and
1801 insertDimensionData(..., replace=True) works as expected, even in the
1802 presence of datasets using those dimensions and spatial overlap
1803 relationships.
1804 """
1806 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1807 """Unpack a sphgeom.RangeSet into the integers it contains."""
1808 for begin, end in ranges:
1809 yield from range(begin, end)
1811 def range_set_hull(
1812 ranges: lsst.sphgeom.RangeSet,
1813 pixelization: lsst.sphgeom.HtmPixelization,
1814 ) -> lsst.sphgeom.ConvexPolygon:
1815 """Create a ConvexPolygon hull of the region defined by a set of
1816 HTM pixelization index ranges.
1817 """
1818 points = []
1819 for index in unpack_range_set(ranges):
1820 points.extend(pixelization.triangle(index).getVertices())
1821 return lsst.sphgeom.ConvexPolygon(points)
1823 # Use HTM to set up an initial parent region (one arbitrary trixel)
1824 # and four child regions (the trixels within the parent at the next
1825 # level. We'll use the parent as a tract/visit region and the children
1826 # as its patch/visit_detector regions.
1827 registry = self.makeRegistry()
1828 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1829 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1830 index = 12288
1831 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1832 assert htm6.universe().contains(child_ranges_small)
1833 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1834 parent_region_small = lsst.sphgeom.ConvexPolygon(
1835 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1836 )
1837 assert all(parent_region_small.contains(c) for c in child_regions_small)
1838 # Make a larger version of each child region, defined to be the set of
1839 # htm6 trixels that overlap the original's bounding circle. Make a new
1840 # parent that's the convex hull of the new children.
1841 child_regions_large = [
1842 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1843 ]
1844 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1845 parent_region_large = lsst.sphgeom.ConvexPolygon(
1846 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1847 )
1848 assert all(parent_region_large.contains(c) for c in child_regions_large)
1849 assert parent_region_large.contains(parent_region_small)
1850 assert not parent_region_small.contains(parent_region_large)
1851 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1852 # Find some commonSkyPix indices that overlap the large regions but not
1853 # overlap the small regions. We use commonSkyPix here to make sure the
1854 # real tests later involve what's in the database, not just post-query
1855 # filtering of regions.
1856 child_difference_indices = []
1857 for large, small in zip(child_regions_large, child_regions_small):
1858 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1859 assert difference, "if this is empty, we can't test anything useful with these regions"
1860 assert all(
1861 not commonSkyPix.triangle(d).isDisjointFrom(large)
1862 and commonSkyPix.triangle(d).isDisjointFrom(small)
1863 for d in difference
1864 )
1865 child_difference_indices.append(difference)
1866 parent_difference_indices = list(
1867 unpack_range_set(
1868 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1869 )
1870 )
1871 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1872 assert all(
1873 (
1874 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1875 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1876 )
1877 for d in parent_difference_indices
1878 )
1879 # Now that we've finally got those regions, we'll insert the large ones
1880 # as tract/patch dimension records.
1881 skymap_name = "testing_v1"
1882 registry.insertDimensionData(
1883 "skymap",
1884 {
1885 "name": skymap_name,
1886 "hash": bytes([42]),
1887 "tract_max": 1,
1888 "patch_nx_max": 2,
1889 "patch_ny_max": 2,
1890 },
1891 )
1892 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1893 registry.insertDimensionData(
1894 "patch",
1895 *[
1896 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1897 for n, c in enumerate(child_regions_large)
1898 ],
1899 )
1900 # Add at dataset that uses these dimensions to make sure that modifying
1901 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1902 # implement insert with replace=True as delete-then-insert).
1903 dataset_type = DatasetType(
1904 "coadd",
1905 dimensions=["tract", "patch"],
1906 universe=registry.dimensions,
1907 storageClass="Exposure",
1908 )
1909 registry.registerDatasetType(dataset_type)
1910 registry.registerCollection("the_run", CollectionType.RUN)
1911 registry.insertDatasets(
1912 dataset_type,
1913 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1914 run="the_run",
1915 )
1916 # Query for tracts and patches that overlap some "difference" htm9
1917 # pixels; there should be overlaps, because the database has
1918 # the "large" suite of regions.
1919 self.assertEqual(
1920 {0},
1921 {
1922 data_id["tract"]
1923 for data_id in registry.queryDataIds(
1924 ["tract"],
1925 skymap=skymap_name,
1926 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1927 )
1928 },
1929 )
1930 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1931 self.assertIn(
1932 patch_id,
1933 {
1934 data_id["patch"]
1935 for data_id in registry.queryDataIds(
1936 ["patch"],
1937 skymap=skymap_name,
1938 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1939 )
1940 },
1941 )
1942 # Use sync to update the tract region and insert to update the regions
1943 # of the patches, to the "small" suite.
1944 updated = registry.syncDimensionData(
1945 "tract",
1946 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1947 update=True,
1948 )
1949 self.assertEqual(updated, {"region": parent_region_large})
1950 registry.insertDimensionData(
1951 "patch",
1952 *[
1953 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1954 for n, c in enumerate(child_regions_small)
1955 ],
1956 replace=True,
1957 )
1958 # Query again; there now should be no such overlaps, because the
1959 # database has the "small" suite of regions.
1960 self.assertFalse(
1961 set(
1962 registry.queryDataIds(
1963 ["tract"],
1964 skymap=skymap_name,
1965 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1966 )
1967 )
1968 )
1969 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1970 self.assertNotIn(
1971 patch_id,
1972 {
1973 data_id["patch"]
1974 for data_id in registry.queryDataIds(
1975 ["patch"],
1976 skymap=skymap_name,
1977 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1978 )
1979 },
1980 )
1981 # Update back to the large regions and query one more time.
1982 updated = registry.syncDimensionData(
1983 "tract",
1984 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1985 update=True,
1986 )
1987 self.assertEqual(updated, {"region": parent_region_small})
1988 registry.insertDimensionData(
1989 "patch",
1990 *[
1991 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1992 for n, c in enumerate(child_regions_large)
1993 ],
1994 replace=True,
1995 )
1996 self.assertEqual(
1997 {0},
1998 {
1999 data_id["tract"]
2000 for data_id in registry.queryDataIds(
2001 ["tract"],
2002 skymap=skymap_name,
2003 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2004 )
2005 },
2006 )
2007 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2008 self.assertIn(
2009 patch_id,
2010 {
2011 data_id["patch"]
2012 for data_id in registry.queryDataIds(
2013 ["patch"],
2014 skymap=skymap_name,
2015 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2016 )
2017 },
2018 )
2020 def testCalibrationCollections(self):
2021 """Test operations on `~CollectionType.CALIBRATION` collections,
2022 including `Registry.certify`, `Registry.decertify`, and
2023 `Registry.findDataset`.
2024 """
2025 # Setup - make a Registry, fill it with some datasets in
2026 # non-calibration collections.
2027 registry = self.makeRegistry()
2028 self.loadData(registry, "base.yaml")
2029 self.loadData(registry, "datasets.yaml")
2030 # Set up some timestamps.
2031 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2032 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2033 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2034 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2035 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2036 allTimespans = [
2037 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2038 ]
2039 # Get references to some datasets.
2040 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2041 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2042 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2043 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2044 # Register the main calibration collection we'll be working with.
2045 collection = "Cam1/calibs/default"
2046 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2047 # Cannot associate into a calibration collection (no timespan).
2048 with self.assertRaises(CollectionTypeError):
2049 registry.associate(collection, [bias2a])
2050 # Certify 2a dataset with [t2, t4) validity.
2051 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2052 # Test that we can query for this dataset via the new collection, both
2053 # on its own and with a RUN collection, as long as we don't try to join
2054 # in temporal dimensions or use findFirst=True.
2055 self.assertEqual(
2056 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2057 {bias2a},
2058 )
2059 self.assertEqual(
2060 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2061 {
2062 bias2a,
2063 bias2b,
2064 bias3b,
2065 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2066 },
2067 )
2068 self.assertEqual(
2069 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2070 {registry.expandDataId(instrument="Cam1", detector=2)},
2071 )
2072 self.assertEqual(
2073 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2074 {
2075 registry.expandDataId(instrument="Cam1", detector=2),
2076 registry.expandDataId(instrument="Cam1", detector=3),
2077 registry.expandDataId(instrument="Cam1", detector=4),
2078 },
2079 )
2081 # We should not be able to certify 2b with anything overlapping that
2082 # window.
2083 with self.assertRaises(ConflictingDefinitionError):
2084 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2085 with self.assertRaises(ConflictingDefinitionError):
2086 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2087 with self.assertRaises(ConflictingDefinitionError):
2088 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2089 with self.assertRaises(ConflictingDefinitionError):
2090 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2091 with self.assertRaises(ConflictingDefinitionError):
2092 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2093 with self.assertRaises(ConflictingDefinitionError):
2094 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2095 with self.assertRaises(ConflictingDefinitionError):
2096 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2097 with self.assertRaises(ConflictingDefinitionError):
2098 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2099 # We should be able to certify 3a with a range overlapping that window,
2100 # because it's for a different detector.
2101 # We'll certify 3a over [t1, t3).
2102 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2103 # Now we'll certify 2b and 3b together over [t4, ∞).
2104 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2106 # Fetch all associations and check that they are what we expect.
2107 self.assertCountEqual(
2108 list(
2109 registry.queryDatasetAssociations(
2110 "bias",
2111 collections=[collection, "imported_g", "imported_r"],
2112 )
2113 ),
2114 [
2115 DatasetAssociation(
2116 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2117 collection="imported_g",
2118 timespan=None,
2119 ),
2120 DatasetAssociation(
2121 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2122 collection="imported_r",
2123 timespan=None,
2124 ),
2125 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2126 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2127 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2128 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2129 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2130 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2131 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2132 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2133 ],
2134 )
2136 class Ambiguous:
2137 """Tag class to denote lookups that should be ambiguous."""
2139 pass
2141 def assertLookup(
2142 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2143 ) -> None:
2144 """Local function that asserts that a bias lookup returns the given
2145 expected result.
2146 """
2147 if expected is Ambiguous:
2148 with self.assertRaises((DatasetTypeError, LookupError)):
2149 registry.findDataset(
2150 "bias",
2151 collections=collection,
2152 instrument="Cam1",
2153 detector=detector,
2154 timespan=timespan,
2155 )
2156 else:
2157 self.assertEqual(
2158 expected,
2159 registry.findDataset(
2160 "bias",
2161 collections=collection,
2162 instrument="Cam1",
2163 detector=detector,
2164 timespan=timespan,
2165 ),
2166 )
2168 # Systematically test lookups against expected results.
2169 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2170 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2171 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2172 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2173 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2174 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2175 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2176 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2177 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2178 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2179 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2180 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2181 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2182 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2183 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2184 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2185 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2186 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2187 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2188 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2189 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2190 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2191 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2192 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2193 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2194 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2195 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2196 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2197 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2198 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2199 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2200 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2201 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2202 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2203 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2204 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2205 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2206 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2207 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2208 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2209 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2210 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2212 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2213 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2214 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2215 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2216 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2217 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2218 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2219 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2220 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2221 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2222 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2223 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2224 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2225 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2226 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2227 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2228 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2229 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2230 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2231 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2232 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2233 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2234 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2235 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2236 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2237 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2238 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2239 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2240 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2241 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2242 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2243 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2244 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2245 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2246 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2247 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2248 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2249 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2250 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2251 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2252 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2253 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2254 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2255 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2256 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2257 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2259 # Decertify everything, this time with explicit data IDs, then check
2260 # that no lookups succeed.
2261 registry.decertify(
2262 collection,
2263 "bias",
2264 Timespan(None, None),
2265 dataIds=[
2266 dict(instrument="Cam1", detector=2),
2267 dict(instrument="Cam1", detector=3),
2268 ],
2269 )
2270 for detector in (2, 3):
2271 for timespan in allTimespans:
2272 assertLookup(detector=detector, timespan=timespan, expected=None)
2273 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2274 # those.
2275 registry.certify(
2276 collection,
2277 [bias2a, bias3a],
2278 Timespan(None, None),
2279 )
2280 for timespan in allTimespans:
2281 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2282 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2283 # Decertify just bias2 over [t2, t4).
2284 # This should split a single certification row into two (and leave the
2285 # other existing row, for bias3a, alone).
2286 registry.decertify(
2287 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2288 )
2289 for timespan in allTimespans:
2290 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2291 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2292 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2293 if overlapsBefore and overlapsAfter:
2294 expected = Ambiguous
2295 elif overlapsBefore or overlapsAfter:
2296 expected = bias2a
2297 else:
2298 expected = None
2299 assertLookup(detector=2, timespan=timespan, expected=expected)
2301 def testSkipCalibs(self):
2302 """Test how queries handle skipping of calibration collections."""
2303 registry = self.makeRegistry()
2304 self.loadData(registry, "base.yaml")
2305 self.loadData(registry, "datasets.yaml")
2307 coll_calib = "Cam1/calibs/default"
2308 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2310 # Add all biases to the calibration collection.
2311 # Without this, the logic that prunes dataset subqueries based on
2312 # datasetType-collection summary information will fire before the logic
2313 # we want to test below. This is a good thing (it avoids the dreaded
2314 # NotImplementedError a bit more often) everywhere but here.
2315 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2317 coll_list = [coll_calib, "imported_g", "imported_r"]
2318 chain = "Cam1/chain"
2319 registry.registerCollection(chain, type=CollectionType.CHAINED)
2320 registry.setCollectionChain(chain, coll_list)
2322 # explicit list will raise if findFirst=True or there are temporal
2323 # dimensions
2324 with self.assertRaises(NotImplementedError):
2325 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2326 with self.assertRaises(NotImplementedError):
2327 registry.queryDataIds(
2328 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2329 ).count()
2331 # chain will skip
2332 datasets = list(registry.queryDatasets("bias", collections=chain))
2333 self.assertGreater(len(datasets), 0)
2335 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2336 self.assertGreater(len(dataIds), 0)
2338 # glob will skip too
2339 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2340 self.assertGreater(len(datasets), 0)
2342 # regular expression will skip too
2343 pattern = re.compile(".*")
2344 datasets = list(registry.queryDatasets("bias", collections=pattern))
2345 self.assertGreater(len(datasets), 0)
2347 # ellipsis should work as usual
2348 datasets = list(registry.queryDatasets("bias", collections=...))
2349 self.assertGreater(len(datasets), 0)
2351 # few tests with findFirst
2352 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2353 self.assertGreater(len(datasets), 0)
2355 def testIngestTimeQuery(self):
2356 registry = self.makeRegistry()
2357 self.loadData(registry, "base.yaml")
2358 dt0 = datetime.utcnow()
2359 self.loadData(registry, "datasets.yaml")
2360 dt1 = datetime.utcnow()
2362 datasets = list(registry.queryDatasets(..., collections=...))
2363 len0 = len(datasets)
2364 self.assertGreater(len0, 0)
2366 where = "ingest_date > T'2000-01-01'"
2367 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2368 len1 = len(datasets)
2369 self.assertEqual(len0, len1)
2371 # no one will ever use this piece of software in 30 years
2372 where = "ingest_date > T'2050-01-01'"
2373 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2374 len2 = len(datasets)
2375 self.assertEqual(len2, 0)
2377 # Check more exact timing to make sure there is no 37 seconds offset
2378 # (after fixing DM-30124). SQLite time precision is 1 second, make
2379 # sure that we don't test with higher precision.
2380 tests = [
2381 # format: (timestamp, operator, expected_len)
2382 (dt0 - timedelta(seconds=1), ">", len0),
2383 (dt0 - timedelta(seconds=1), "<", 0),
2384 (dt1 + timedelta(seconds=1), "<", len0),
2385 (dt1 + timedelta(seconds=1), ">", 0),
2386 ]
2387 for dt, op, expect_len in tests:
2388 dt_str = dt.isoformat(sep=" ")
2390 where = f"ingest_date {op} T'{dt_str}'"
2391 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2392 self.assertEqual(len(datasets), expect_len)
2394 # same with bind using datetime or astropy Time
2395 where = f"ingest_date {op} ingest_time"
2396 datasets = list(
2397 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2398 )
2399 self.assertEqual(len(datasets), expect_len)
2401 dt_astropy = astropy.time.Time(dt, format="datetime")
2402 datasets = list(
2403 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2404 )
2405 self.assertEqual(len(datasets), expect_len)
2407 def testTimespanQueries(self):
2408 """Test query expressions involving timespans."""
2409 registry = self.makeRegistry()
2410 self.loadData(registry, "hsc-rc2-subset.yaml")
2411 # All exposures in the database; mapping from ID to timespan.
2412 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2413 # Just those IDs, sorted (which is also temporal sorting, because HSC
2414 # exposure IDs are monotonically increasing).
2415 ids = sorted(visits.keys())
2416 self.assertGreater(len(ids), 20)
2417 # Pick some quasi-random indexes into `ids` to play with.
2418 i1 = int(len(ids) * 0.1)
2419 i2 = int(len(ids) * 0.3)
2420 i3 = int(len(ids) * 0.6)
2421 i4 = int(len(ids) * 0.8)
2422 # Extract some times from those: just before the beginning of i1 (which
2423 # should be after the end of the exposure before), exactly the
2424 # beginning of i2, just after the beginning of i3 (and before its end),
2425 # and the exact end of i4.
2426 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2427 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2428 t2 = visits[ids[i2]].begin
2429 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2430 self.assertLess(t3, visits[ids[i3]].end)
2431 t4 = visits[ids[i4]].end
2432 # Make sure those are actually in order.
2433 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2435 bind = {
2436 "t1": t1,
2437 "t2": t2,
2438 "t3": t3,
2439 "t4": t4,
2440 "ts23": Timespan(t2, t3),
2441 }
2443 def query(where):
2444 """Helper function that queries for visit data IDs and returns
2445 results as a sorted, deduplicated list of visit IDs.
2446 """
2447 return sorted(
2448 {
2449 dataId["visit"]
2450 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2451 }
2452 )
2454 # Try a bunch of timespan queries, mixing up the bounds themselves,
2455 # where they appear in the expression, and how we get the timespan into
2456 # the expression.
2458 # t1 is before the start of i1, so this should not include i1.
2459 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2460 # t2 is exactly at the start of i2, but ends are exclusive, so these
2461 # should not include i2.
2462 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2463 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2464 # t3 is in the middle of i3, so this should include i3.
2465 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2466 # This one should not include t3 by the same reasoning.
2467 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2468 # t4 is exactly at the end of i4, so this should include i4.
2469 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2470 # i4's upper bound of t4 is exclusive so this should not include t4.
2471 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2473 # Now some timespan vs. time scalar queries.
2474 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2475 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2476 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2477 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2478 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2479 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2481 # Empty timespans should not overlap anything.
2482 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2484 def testCollectionSummaries(self):
2485 """Test recording and retrieval of collection summaries."""
2486 self.maxDiff = None
2487 registry = self.makeRegistry()
2488 # Importing datasets from yaml should go through the code path where
2489 # we update collection summaries as we insert datasets.
2490 self.loadData(registry, "base.yaml")
2491 self.loadData(registry, "datasets.yaml")
2492 flat = registry.getDatasetType("flat")
2493 expected1 = CollectionSummary()
2494 expected1.dataset_types.add(registry.getDatasetType("bias"))
2495 expected1.add_data_ids(
2496 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2497 )
2498 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2499 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2500 # Create a chained collection with both of the imported runs; the
2501 # summary should be the same, because it's a union with itself.
2502 chain = "chain"
2503 registry.registerCollection(chain, CollectionType.CHAINED)
2504 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2505 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2506 # Associate flats only into a tagged collection and a calibration
2507 # collection to check summaries of those.
2508 tag = "tag"
2509 registry.registerCollection(tag, CollectionType.TAGGED)
2510 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2511 calibs = "calibs"
2512 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2513 registry.certify(
2514 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2515 )
2516 expected2 = expected1.copy()
2517 expected2.dataset_types.discard("bias")
2518 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2519 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2520 # Explicitly calling Registry.refresh() should load those same
2521 # summaries, via a totally different code path.
2522 registry.refresh()
2523 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2524 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2525 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2526 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2528 def testBindInQueryDatasets(self):
2529 """Test that the bind parameter is correctly forwarded in
2530 queryDatasets recursion.
2531 """
2532 registry = self.makeRegistry()
2533 # Importing datasets from yaml should go through the code path where
2534 # we update collection summaries as we insert datasets.
2535 self.loadData(registry, "base.yaml")
2536 self.loadData(registry, "datasets.yaml")
2537 self.assertEqual(
2538 set(registry.queryDatasets("flat", band="r", collections=...)),
2539 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2540 )
2542 def testQueryIntRangeExpressions(self):
2543 """Test integer range expressions in ``where`` arguments.
2545 Note that our expressions use inclusive stop values, unlike Python's.
2546 """
2547 registry = self.makeRegistry()
2548 self.loadData(registry, "base.yaml")
2549 self.assertEqual(
2550 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2551 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2552 )
2553 self.assertEqual(
2554 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2555 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2556 )
2557 self.assertEqual(
2558 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2559 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2560 )
2562 def testQueryResultSummaries(self):
2563 """Test summary methods like `count`, `any`, and `explain_no_results`
2564 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2565 """
2566 registry = self.makeRegistry()
2567 self.loadData(registry, "base.yaml")
2568 self.loadData(registry, "datasets.yaml")
2569 self.loadData(registry, "spatial.yaml")
2570 # Default test dataset has two collections, each with both flats and
2571 # biases. Add a new collection with only biases.
2572 registry.registerCollection("biases", CollectionType.TAGGED)
2573 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2574 # First query yields two results, and involves no postprocessing.
2575 query1 = registry.queryDataIds(["physical_filter"], band="r")
2576 self.assertTrue(query1.any(execute=False, exact=False))
2577 self.assertTrue(query1.any(execute=True, exact=False))
2578 self.assertTrue(query1.any(execute=True, exact=True))
2579 self.assertEqual(query1.count(exact=False), 2)
2580 self.assertEqual(query1.count(exact=True), 2)
2581 self.assertFalse(list(query1.explain_no_results()))
2582 # Second query should yield no results, which we should see when
2583 # we attempt to expand the data ID.
2584 query2 = registry.queryDataIds(["physical_filter"], band="h")
2585 # There's no execute=False, exact=Fals test here because the behavior
2586 # not something we want to guarantee in this case (and exact=False
2587 # says either answer is legal).
2588 self.assertFalse(query2.any(execute=True, exact=False))
2589 self.assertFalse(query2.any(execute=True, exact=True))
2590 self.assertEqual(query2.count(exact=False), 0)
2591 self.assertEqual(query2.count(exact=True), 0)
2592 self.assertTrue(list(query2.explain_no_results()))
2593 # These queries yield no results due to various problems that can be
2594 # spotted prior to execution, yielding helpful diagnostics.
2595 base_query = registry.queryDataIds(["detector", "physical_filter"])
2596 queries_and_snippets = [
2597 (
2598 # Dataset type name doesn't match any existing dataset types.
2599 registry.queryDatasets("nonexistent", collections=...),
2600 ["nonexistent"],
2601 ),
2602 (
2603 # Dataset type object isn't registered.
2604 registry.queryDatasets(
2605 DatasetType(
2606 "nonexistent",
2607 dimensions=["instrument"],
2608 universe=registry.dimensions,
2609 storageClass="Image",
2610 ),
2611 collections=...,
2612 ),
2613 ["nonexistent"],
2614 ),
2615 (
2616 # No datasets of this type in this collection.
2617 registry.queryDatasets("flat", collections=["biases"]),
2618 ["flat", "biases"],
2619 ),
2620 (
2621 # No datasets of this type in this collection.
2622 base_query.findDatasets("flat", collections=["biases"]),
2623 ["flat", "biases"],
2624 ),
2625 (
2626 # No collections matching at all.
2627 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2628 ["potato"],
2629 ),
2630 ]
2631 # The behavior of these additional queries is slated to change in the
2632 # future, so we also check for deprecation warnings.
2633 with self.assertWarns(FutureWarning):
2634 queries_and_snippets.append(
2635 (
2636 # Dataset type name doesn't match any existing dataset
2637 # types.
2638 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2639 ["nonexistent"],
2640 )
2641 )
2642 with self.assertWarns(FutureWarning):
2643 queries_and_snippets.append(
2644 (
2645 # Dataset type name doesn't match any existing dataset
2646 # types.
2647 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2648 ["nonexistent"],
2649 )
2650 )
2651 for query, snippets in queries_and_snippets:
2652 self.assertFalse(query.any(execute=False, exact=False))
2653 self.assertFalse(query.any(execute=True, exact=False))
2654 self.assertFalse(query.any(execute=True, exact=True))
2655 self.assertEqual(query.count(exact=False), 0)
2656 self.assertEqual(query.count(exact=True), 0)
2657 messages = list(query.explain_no_results())
2658 self.assertTrue(messages)
2659 # Want all expected snippets to appear in at least one message.
2660 self.assertTrue(
2661 any(
2662 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2663 ),
2664 messages,
2665 )
2667 # This query does yield results, but should also emit a warning because
2668 # dataset type patterns to queryDataIds is deprecated; just look for
2669 # the warning.
2670 with self.assertWarns(FutureWarning):
2671 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2673 # These queries yield no results due to problems that can be identified
2674 # by cheap follow-up queries, yielding helpful diagnostics.
2675 for query, snippets in [
2676 (
2677 # No records for one of the involved dimensions.
2678 registry.queryDataIds(["subfilter"]),
2679 ["no rows", "subfilter"],
2680 ),
2681 (
2682 # No records for one of the involved dimensions.
2683 registry.queryDimensionRecords("subfilter"),
2684 ["no rows", "subfilter"],
2685 ),
2686 ]:
2687 self.assertFalse(query.any(execute=True, exact=False))
2688 self.assertFalse(query.any(execute=True, exact=True))
2689 self.assertEqual(query.count(exact=True), 0)
2690 messages = list(query.explain_no_results())
2691 self.assertTrue(messages)
2692 # Want all expected snippets to appear in at least one message.
2693 self.assertTrue(
2694 any(
2695 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2696 ),
2697 messages,
2698 )
2700 # This query yields four overlaps in the database, but one is filtered
2701 # out in postprocessing. The count queries aren't accurate because
2702 # they don't account for duplication that happens due to an internal
2703 # join against commonSkyPix.
2704 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2705 self.assertEqual(
2706 {
2707 DataCoordinate.standardize(
2708 instrument="Cam1",
2709 skymap="SkyMap1",
2710 visit=v,
2711 tract=t,
2712 universe=registry.dimensions,
2713 )
2714 for v, t in [(1, 0), (2, 0), (2, 1)]
2715 },
2716 set(query3),
2717 )
2718 self.assertTrue(query3.any(execute=False, exact=False))
2719 self.assertTrue(query3.any(execute=True, exact=False))
2720 self.assertTrue(query3.any(execute=True, exact=True))
2721 self.assertGreaterEqual(query3.count(exact=False), 4)
2722 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2723 self.assertFalse(list(query3.explain_no_results()))
2724 # This query yields overlaps in the database, but all are filtered
2725 # out in postprocessing. The count queries again aren't very useful.
2726 # We have to use `where=` here to avoid an optimization that
2727 # (currently) skips the spatial postprocess-filtering because it
2728 # recognizes that no spatial join is necessary. That's not ideal, but
2729 # fixing it is out of scope for this ticket.
2730 query4 = registry.queryDataIds(
2731 ["visit", "tract"],
2732 instrument="Cam1",
2733 skymap="SkyMap1",
2734 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2735 )
2736 self.assertFalse(set(query4))
2737 self.assertTrue(query4.any(execute=False, exact=False))
2738 self.assertTrue(query4.any(execute=True, exact=False))
2739 self.assertFalse(query4.any(execute=True, exact=True))
2740 self.assertGreaterEqual(query4.count(exact=False), 1)
2741 self.assertEqual(query4.count(exact=True, discard=True), 0)
2742 messages = query4.explain_no_results()
2743 self.assertTrue(messages)
2744 self.assertTrue(any("overlap" in message for message in messages))
2745 # This query should yield results from one dataset type but not the
2746 # other, which is not registered.
2747 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2748 self.assertTrue(set(query5))
2749 self.assertTrue(query5.any(execute=False, exact=False))
2750 self.assertTrue(query5.any(execute=True, exact=False))
2751 self.assertTrue(query5.any(execute=True, exact=True))
2752 self.assertGreaterEqual(query5.count(exact=False), 1)
2753 self.assertGreaterEqual(query5.count(exact=True), 1)
2754 self.assertFalse(list(query5.explain_no_results()))
2755 # This query applies a selection that yields no results, fully in the
2756 # database. Explaining why it fails involves traversing the relation
2757 # tree and running a LIMIT 1 query at each level that has the potential
2758 # to remove rows.
2759 query6 = registry.queryDimensionRecords(
2760 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2761 )
2762 self.assertEqual(query6.count(exact=True), 0)
2763 messages = query6.explain_no_results()
2764 self.assertTrue(messages)
2765 self.assertTrue(any("no-purpose" in message for message in messages))
2767 def testQueryDataIdsOrderBy(self):
2768 """Test order_by and limit on result returned by queryDataIds()."""
2769 registry = self.makeRegistry()
2770 self.loadData(registry, "base.yaml")
2771 self.loadData(registry, "datasets.yaml")
2772 self.loadData(registry, "spatial.yaml")
2774 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2775 return registry.queryDataIds(
2776 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2777 )
2779 Test = namedtuple(
2780 "testQueryDataIdsOrderByTest",
2781 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2782 defaults=(None, None, None),
2783 )
2785 test_data = (
2786 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2787 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2788 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2789 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2790 Test(
2791 "tract.id,visit.id",
2792 "tract,visit",
2793 ((0, 1), (0, 1), (0, 2)),
2794 limit=(3,),
2795 ),
2796 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2797 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2798 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2799 Test(
2800 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2801 ),
2802 Test(
2803 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2804 ),
2805 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2806 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2807 Test(
2808 "tract,-timespan.begin,timespan.end",
2809 "tract,visit",
2810 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2811 ),
2812 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2813 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2814 Test(
2815 "tract,detector",
2816 "tract,detector",
2817 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2818 datasets="flat",
2819 collections="imported_r",
2820 ),
2821 Test(
2822 "tract,detector.full_name",
2823 "tract,detector",
2824 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2825 datasets="flat",
2826 collections="imported_r",
2827 ),
2828 Test(
2829 "tract,detector.raft,detector.name_in_raft",
2830 "tract,detector",
2831 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2832 datasets="flat",
2833 collections="imported_r",
2834 ),
2835 )
2837 for test in test_data:
2838 order_by = test.order_by.split(",")
2839 keys = test.keys.split(",")
2840 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2841 if test.limit is not None:
2842 query = query.limit(*test.limit)
2843 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2844 self.assertEqual(dataIds, test.result)
2846 # and materialize
2847 query = do_query(keys).order_by(*order_by)
2848 if test.limit is not None:
2849 query = query.limit(*test.limit)
2850 with self.assertRaises(RelationalAlgebraError):
2851 with query.materialize():
2852 pass
2854 # errors in a name
2855 for order_by in ("", "-"):
2856 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2857 list(do_query().order_by(order_by))
2859 for order_by in ("undimension.name", "-undimension.name"):
2860 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2861 list(do_query().order_by(order_by))
2863 for order_by in ("attract", "-attract"):
2864 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2865 list(do_query().order_by(order_by))
2867 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2868 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2870 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2871 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2873 with self.assertRaisesRegex(
2874 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2875 ):
2876 list(do_query("tract").order_by("timespan.begin"))
2878 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2879 list(do_query("tract").order_by("tract.timespan.begin"))
2881 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2882 list(do_query("tract").order_by("tract.name"))
2884 def testQueryDataIdsGovernorExceptions(self):
2885 """Test exceptions raised by queryDataIds() for incorrect governors."""
2886 registry = self.makeRegistry()
2887 self.loadData(registry, "base.yaml")
2888 self.loadData(registry, "datasets.yaml")
2889 self.loadData(registry, "spatial.yaml")
2891 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2892 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2894 Test = namedtuple(
2895 "testQueryDataIdExceptionsTest",
2896 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2897 defaults=(None, None, None, {}, None, 0),
2898 )
2900 test_data = (
2901 Test("tract,visit", count=6),
2902 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2903 Test(
2904 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2905 ),
2906 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2907 Test(
2908 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2909 ),
2910 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2911 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2912 Test(
2913 "tract,visit",
2914 where="instrument=cam AND skymap=map",
2915 bind={"cam": "Cam1", "map": "SkyMap1"},
2916 count=6,
2917 ),
2918 Test(
2919 "tract,visit",
2920 where="instrument=cam AND skymap=map",
2921 bind={"cam": "Cam", "map": "SkyMap"},
2922 exception=DataIdValueError,
2923 ),
2924 )
2926 for test in test_data:
2927 dimensions = test.dimensions.split(",")
2928 if test.exception:
2929 with self.assertRaises(test.exception):
2930 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2931 else:
2932 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2933 self.assertEqual(query.count(discard=True), test.count)
2935 # and materialize
2936 if test.exception:
2937 with self.assertRaises(test.exception):
2938 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2939 with query.materialize() as materialized:
2940 materialized.count(discard=True)
2941 else:
2942 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2943 with query.materialize() as materialized:
2944 self.assertEqual(materialized.count(discard=True), test.count)
2946 def testQueryDimensionRecordsOrderBy(self):
2947 """Test order_by and limit on result returned by
2948 queryDimensionRecords().
2949 """
2950 registry = self.makeRegistry()
2951 self.loadData(registry, "base.yaml")
2952 self.loadData(registry, "datasets.yaml")
2953 self.loadData(registry, "spatial.yaml")
2955 def do_query(element, datasets=None, collections=None):
2956 return registry.queryDimensionRecords(
2957 element, instrument="Cam1", datasets=datasets, collections=collections
2958 )
2960 query = do_query("detector")
2961 self.assertEqual(len(list(query)), 4)
2963 Test = namedtuple(
2964 "testQueryDataIdsOrderByTest",
2965 ("element", "order_by", "result", "limit", "datasets", "collections"),
2966 defaults=(None, None, None),
2967 )
2969 test_data = (
2970 Test("detector", "detector", (1, 2, 3, 4)),
2971 Test("detector", "-detector", (4, 3, 2, 1)),
2972 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2973 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2974 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2975 Test("visit", "visit", (1, 2)),
2976 Test("visit", "-visit.id", (2, 1)),
2977 Test("visit", "zenith_angle", (1, 2)),
2978 Test("visit", "-visit.name", (2, 1)),
2979 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2980 )
2982 for test in test_data:
2983 order_by = test.order_by.split(",")
2984 query = do_query(test.element).order_by(*order_by)
2985 if test.limit is not None:
2986 query = query.limit(*test.limit)
2987 dataIds = tuple(rec.id for rec in query)
2988 self.assertEqual(dataIds, test.result)
2990 # errors in a name
2991 for order_by in ("", "-"):
2992 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2993 list(do_query("detector").order_by(order_by))
2995 for order_by in ("undimension.name", "-undimension.name"):
2996 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
2997 list(do_query("detector").order_by(order_by))
2999 for order_by in ("attract", "-attract"):
3000 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3001 list(do_query("detector").order_by(order_by))
3003 def testQueryDimensionRecordsExceptions(self):
3004 """Test exceptions raised by queryDimensionRecords()."""
3005 registry = self.makeRegistry()
3006 self.loadData(registry, "base.yaml")
3007 self.loadData(registry, "datasets.yaml")
3008 self.loadData(registry, "spatial.yaml")
3010 result = registry.queryDimensionRecords("detector")
3011 self.assertEqual(result.count(), 4)
3012 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3013 self.assertEqual(result.count(), 4)
3014 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3015 self.assertEqual(result.count(), 4)
3016 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3017 self.assertEqual(result.count(), 4)
3018 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3019 self.assertEqual(result.count(), 4)
3021 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3022 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3023 result.count()
3025 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3026 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3027 result.count()
3029 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3030 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3031 result.count()
3033 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3034 result = registry.queryDimensionRecords(
3035 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3036 )
3037 result.count()
3039 def testDatasetConstrainedDimensionRecordQueries(self):
3040 """Test that queryDimensionRecords works even when given a dataset
3041 constraint whose dimensions extend beyond the requested dimension
3042 element's.
3043 """
3044 registry = self.makeRegistry()
3045 self.loadData(registry, "base.yaml")
3046 self.loadData(registry, "datasets.yaml")
3047 # Query for physical_filter dimension records, using a dataset that
3048 # has both physical_filter and dataset dimensions.
3049 records = registry.queryDimensionRecords(
3050 "physical_filter",
3051 datasets=["flat"],
3052 collections="imported_r",
3053 )
3054 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3055 # Trying to constrain by all dataset types is an error.
3056 with self.assertRaises(TypeError):
3057 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3059 def testSkyPixDatasetQueries(self):
3060 """Test that we can build queries involving skypix dimensions as long
3061 as a dataset type that uses those dimensions is included.
3062 """
3063 registry = self.makeRegistry()
3064 self.loadData(registry, "base.yaml")
3065 dataset_type = DatasetType(
3066 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3067 )
3068 registry.registerDatasetType(dataset_type)
3069 run = "r"
3070 registry.registerRun(run)
3071 # First try queries where there are no datasets; the concern is whether
3072 # we can even build and execute these queries without raising, even
3073 # when "doomed" query shortcuts are in play.
3074 self.assertFalse(
3075 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3076 )
3077 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3078 # Now add a dataset and see that we can get it back.
3079 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3080 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3081 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3082 self.assertEqual(
3083 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3084 {data_id},
3085 )
3086 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3088 def testDatasetIdFactory(self):
3089 """Simple test for DatasetIdFactory, mostly to catch potential changes
3090 in its API.
3091 """
3092 registry = self.makeRegistry()
3093 factory = registry.datasetIdFactory
3094 dataset_type = DatasetType(
3095 "datasetType",
3096 dimensions=["detector", "instrument"],
3097 universe=registry.dimensions,
3098 storageClass="int",
3099 )
3100 run = "run"
3101 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3103 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3104 self.assertIsInstance(datasetId, uuid.UUID)
3105 self.assertEqual(datasetId.version, 4)
3107 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3108 self.assertIsInstance(datasetId, uuid.UUID)
3109 self.assertEqual(datasetId.version, 5)
3111 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3112 self.assertIsInstance(datasetId, uuid.UUID)
3113 self.assertEqual(datasetId.version, 5)
3115 def testExposureQueries(self):
3116 """Test query methods using arguments sourced from the exposure log
3117 service.
3119 The most complete test dataset currently available to daf_butler tests
3120 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3121 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3122 dimension records as it was focused on providing nontrivial spatial
3123 overlaps between visit+detector and tract+patch. So in this test we
3124 need to translate queries that originally used the exposure dimension
3125 to use the (very similar) visit dimension instead.
3126 """
3127 registry = self.makeRegistry()
3128 self.loadData(registry, "hsc-rc2-subset.yaml")
3129 self.assertEqual(
3130 [
3131 record.id
3132 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3133 .order_by("id")
3134 .limit(5)
3135 ],
3136 [318, 322, 326, 330, 332],
3137 )
3138 self.assertEqual(
3139 [
3140 data_id["visit"]
3141 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3142 ],
3143 [318, 322, 326, 330, 332],
3144 )
3145 self.assertEqual(
3146 [
3147 record.id
3148 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3149 .order_by("full_name")
3150 .limit(5)
3151 ],
3152 [73, 72, 71, 70, 65],
3153 )
3154 self.assertEqual(
3155 [
3156 data_id["detector"]
3157 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3158 .order_by("full_name")
3159 .limit(5)
3160 ],
3161 [73, 72, 71, 70, 65],
3162 )
3164 def test_long_query_names(self) -> None:
3165 """Test that queries involving very long names are handled correctly.
3167 This is especially important for PostgreSQL, which truncates symbols
3168 longer than 64 chars, but it's worth testing for all DBs.
3169 """
3170 registry = self.makeRegistry()
3171 name = "abcd" * 17
3172 registry.registerDatasetType(
3173 DatasetType(
3174 name,
3175 dimensions=(),
3176 storageClass="Exposure",
3177 universe=registry.dimensions,
3178 )
3179 )
3180 # Need to search more than one collection actually containing a
3181 # matching dataset to avoid optimizations that sidestep bugs due to
3182 # truncation by making findFirst=True a no-op.
3183 run1 = "run1"
3184 registry.registerRun(run1)
3185 run2 = "run2"
3186 registry.registerRun(run2)
3187 (ref1,) = registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run1)
3188 registry.insertDatasets(name, [DataCoordinate.makeEmpty(registry.dimensions)], run2)
3189 self.assertEqual(
3190 set(registry.queryDatasets(name, collections=[run1, run2], findFirst=True)),
3191 {ref1},
3192 )
3194 def test_skypix_constraint_queries(self) -> None:
3195 """Test queries spatially constrained by a skypix data ID."""
3196 registry = self.makeRegistry()
3197 self.loadData(registry, "hsc-rc2-subset.yaml")
3198 patch_regions = {
3199 (data_id["tract"], data_id["patch"]): data_id.region
3200 for data_id in registry.queryDataIds(["patch"]).expanded()
3201 }
3202 skypix_dimension: SkyPixDimension = registry.dimensions["htm11"]
3203 # This check ensures the test doesn't become trivial due to a config
3204 # change; if it does, just pick a different HTML level.
3205 self.assertNotEqual(skypix_dimension, registry.dimensions.commonSkyPix)
3206 # Gather all skypix IDs that definitely overlap at least one of these
3207 # patches.
3208 relevant_skypix_ids = lsst.sphgeom.RangeSet()
3209 for patch_region in patch_regions.values():
3210 relevant_skypix_ids |= skypix_dimension.pixelization.interior(patch_region)
3211 # Look for a "nontrivial" skypix_id that overlaps at least one patch
3212 # and does not overlap at least one other patch.
3213 for skypix_id in itertools.chain.from_iterable(
3214 range(begin, end) for begin, end in relevant_skypix_ids
3215 ):
3216 skypix_region = skypix_dimension.pixelization.pixel(skypix_id)
3217 overlapping_patches = {
3218 patch_key
3219 for patch_key, patch_region in patch_regions.items()
3220 if not patch_region.isDisjointFrom(skypix_region)
3221 }
3222 if overlapping_patches and overlapping_patches != patch_regions.keys():
3223 break
3224 else:
3225 raise RuntimeError("Could not find usable skypix ID for this dimension configuration.")
3226 self.assertEqual(
3227 {
3228 (data_id["tract"], data_id["patch"])
3229 for data_id in registry.queryDataIds(
3230 ["patch"],
3231 dataId={skypix_dimension.name: skypix_id},
3232 )
3233 },
3234 overlapping_patches,
3235 )
3236 # Test that a three-way join that includes the common skypix system in
3237 # the dimensions doesn't generate redundant join terms in the query.
3238 full_data_ids = set(
3239 registry.queryDataIds(
3240 ["tract", "visit", "htm7"], skymap="hsc_rings_v1", instrument="HSC"
3241 ).expanded()
3242 )
3243 self.assertGreater(len(full_data_ids), 0)
3244 for data_id in full_data_ids:
3245 self.assertFalse(data_id.records["tract"].region.isDisjointFrom(data_id.records["htm7"].region))
3246 self.assertFalse(data_id.records["visit"].region.isDisjointFrom(data_id.records["htm7"].region))
3248 def test_spatial_constraint_queries(self) -> None:
3249 """Test queries in which one spatial dimension in the constraint (data
3250 ID or ``where`` string) constrains a different spatial dimension in the
3251 query result columns.
3252 """
3253 registry = self.makeRegistry()
3254 self.loadData(registry, "hsc-rc2-subset.yaml")
3255 patch_regions = {
3256 (data_id["tract"], data_id["patch"]): data_id.region
3257 for data_id in registry.queryDataIds(["patch"]).expanded()
3258 }
3259 observation_regions = {
3260 (data_id["visit"], data_id["detector"]): data_id.region
3261 for data_id in registry.queryDataIds(["visit", "detector"]).expanded()
3262 }
3263 all_combos = {
3264 (patch_key, observation_key)
3265 for patch_key, observation_key in itertools.product(patch_regions, observation_regions)
3266 }
3267 overlapping_combos = {
3268 (patch_key, observation_key)
3269 for patch_key, observation_key in all_combos
3270 if not patch_regions[patch_key].isDisjointFrom(observation_regions[observation_key])
3271 }
3272 # Check a direct spatial join with no constraint first.
3273 self.assertEqual(
3274 {
3275 ((data_id["tract"], data_id["patch"]), (data_id["visit"], data_id["detector"]))
3276 for data_id in registry.queryDataIds(["patch", "visit", "detector"])
3277 },
3278 overlapping_combos,
3279 )
3280 overlaps_by_patch: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3281 overlaps_by_observation: defaultdict[tuple[int, int], set[tuple[str, str]]] = defaultdict(set)
3282 for patch_key, observation_key in overlapping_combos:
3283 overlaps_by_patch[patch_key].add(observation_key)
3284 overlaps_by_observation[observation_key].add(patch_key)
3285 # Find patches and observations that overlap at least one of the other
3286 # but not all of the other.
3287 nontrivial_patch = next(
3288 iter(
3289 patch_key
3290 for patch_key, observation_keys in overlaps_by_patch.items()
3291 if observation_keys and observation_keys != observation_regions.keys()
3292 )
3293 )
3294 nontrivial_observation = next(
3295 iter(
3296 observation_key
3297 for observation_key, patch_keys in overlaps_by_observation.items()
3298 if patch_keys and patch_keys != patch_regions.keys()
3299 )
3300 )
3301 # Use the nontrivial patches and observations as constraints on the
3302 # other dimensions in various ways, first via a 'where' expression.
3303 # It's better in general to us 'bind' instead of f-strings, but these
3304 # all integers so there are no quoting concerns.
3305 self.assertEqual(
3306 {
3307 (data_id["visit"], data_id["detector"])
3308 for data_id in registry.queryDataIds(
3309 ["visit", "detector"],
3310 where=f"tract={nontrivial_patch[0]} AND patch={nontrivial_patch[1]}",
3311 skymap="hsc_rings_v1",
3312 )
3313 },
3314 overlaps_by_patch[nontrivial_patch],
3315 )
3316 self.assertEqual(
3317 {
3318 (data_id["tract"], data_id["patch"])
3319 for data_id in registry.queryDataIds(
3320 ["patch"],
3321 where=f"visit={nontrivial_observation[0]} AND detector={nontrivial_observation[1]}",
3322 instrument="HSC",
3323 )
3324 },
3325 overlaps_by_observation[nontrivial_observation],
3326 )
3327 # and then via the dataId argument.
3328 self.assertEqual(
3329 {
3330 (data_id["visit"], data_id["detector"])
3331 for data_id in registry.queryDataIds(
3332 ["visit", "detector"],
3333 dataId={
3334 "tract": nontrivial_patch[0],
3335 "patch": nontrivial_patch[1],
3336 },
3337 skymap="hsc_rings_v1",
3338 )
3339 },
3340 overlaps_by_patch[nontrivial_patch],
3341 )
3342 self.assertEqual(
3343 {
3344 (data_id["tract"], data_id["patch"])
3345 for data_id in registry.queryDataIds(
3346 ["patch"],
3347 dataId={
3348 "visit": nontrivial_observation[0],
3349 "detector": nontrivial_observation[1],
3350 },
3351 instrument="HSC",
3352 )
3353 },
3354 overlaps_by_observation[nontrivial_observation],
3355 )
3357 def test_query_projection_drop_postprocessing(self) -> None:
3358 """Test that projections and deduplications on query objects can
3359 drop post-query region filtering to ensure the query remains in
3360 the SQL engine.
3361 """
3362 registry = self.makeRegistry()
3363 self.loadData(registry, "base.yaml")
3364 self.loadData(registry, "spatial.yaml")
3366 def pop_transfer(tree: Relation) -> Relation:
3367 """If a relation tree terminates with a transfer to a new engine,
3368 return the relation prior to that transfer. If not, return the
3369 original relation.
3370 """
3371 match tree:
3372 case Transfer(target=target):
3373 return target
3374 case _:
3375 return tree
3377 # There's no public way to get a Query object yet, so we get one from a
3378 # DataCoordinateQueryResults private attribute. When a public API is
3379 # available this test should use it.
3380 query = registry.queryDataIds(["visit", "detector", "tract", "patch"])._query
3381 # We expect this query to terminate in the iteration engine originally,
3382 # because region-filtering is necessary.
3383 self.assertIsInstance(pop_transfer(query.relation).engine, iteration.Engine)
3384 # If we deduplicate, we usually have to do that downstream of the
3385 # filtering. That means the deduplication has to happen in the
3386 # iteration engine.
3387 self.assertIsInstance(pop_transfer(query.projected(unique=True).relation).engine, iteration.Engine)
3388 # If we pass drop_postprocessing, we instead drop the region filtering
3389 # so the deduplication can happen in SQL (though there might still be
3390 # transfer to iteration at the tail of the tree that we can ignore;
3391 # that's what the pop_transfer takes care of here).
3392 self.assertIsInstance(
3393 pop_transfer(query.projected(unique=True, drop_postprocessing=True).relation).engine,
3394 sql.Engine,
3395 )
3397 def test_query_empty_collections(self) -> None:
3398 """Test for registry query methods with empty collections. The methods
3399 should return empty result set (or None when applicable) and provide
3400 "doomed" diagnostics.
3401 """
3402 registry = self.makeRegistry()
3403 self.loadData(registry, "base.yaml")
3404 self.loadData(registry, "datasets.yaml")
3406 # Tests for registry.findDataset()
3407 with self.assertRaises(NoDefaultCollectionError):
3408 registry.findDataset("bias", instrument="Cam1", detector=1)
3409 self.assertIsNotNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=...))
3410 self.assertIsNone(registry.findDataset("bias", instrument="Cam1", detector=1, collections=[]))
3412 # Tests for registry.queryDatasets()
3413 with self.assertRaises(NoDefaultCollectionError):
3414 registry.queryDatasets("bias")
3415 self.assertTrue(list(registry.queryDatasets("bias", collections=...)))
3417 result = registry.queryDatasets("bias", collections=[])
3418 self.assertEqual(len(list(result)), 0)
3419 messages = list(result.explain_no_results())
3420 self.assertTrue(messages)
3421 self.assertTrue(any("because collection list is empty" in message for message in messages))
3423 # Tests for registry.queryDataIds()
3424 with self.assertRaises(NoDefaultCollectionError):
3425 registry.queryDataIds("detector", datasets="bias")
3426 self.assertTrue(list(registry.queryDataIds("detector", datasets="bias", collections=...)))
3428 result = registry.queryDataIds("detector", datasets="bias", collections=[])
3429 self.assertEqual(len(list(result)), 0)
3430 messages = list(result.explain_no_results())
3431 self.assertTrue(messages)
3432 self.assertTrue(any("because collection list is empty" in message for message in messages))
3434 # Tests for registry.queryDimensionRecords()
3435 with self.assertRaises(NoDefaultCollectionError):
3436 registry.queryDimensionRecords("detector", datasets="bias")
3437 self.assertTrue(list(registry.queryDimensionRecords("detector", datasets="bias", collections=...)))
3439 result = registry.queryDimensionRecords("detector", datasets="bias", collections=[])
3440 self.assertEqual(len(list(result)), 0)
3441 messages = list(result.explain_no_results())
3442 self.assertTrue(messages)
3443 self.assertTrue(any("because collection list is empty" in message for message in messages))