Coverage for python/lsst/daf/butler/registry/tests/_registry.py: 4%
1375 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-02-07 10:26 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25import itertools
26import logging
27import os
28import re
29import unittest
30import uuid
31from abc import ABC, abstractmethod
32from collections import defaultdict, namedtuple
33from datetime import datetime, timedelta
34from typing import TYPE_CHECKING, Iterator, Optional, Type, Union
36import astropy.time
37import sqlalchemy
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from lsst.daf.relation import RelationalAlgebraError
47from ...core import (
48 DataCoordinate,
49 DataCoordinateSet,
50 DatasetAssociation,
51 DatasetRef,
52 DatasetType,
53 DimensionGraph,
54 NamedValueSet,
55 StorageClass,
56 Timespan,
57 ddl,
58)
59from .._collection_summary import CollectionSummary
60from .._collectionType import CollectionType
61from .._config import RegistryConfig
62from .._exceptions import (
63 ArgumentError,
64 CollectionError,
65 CollectionTypeError,
66 ConflictingDefinitionError,
67 DataIdValueError,
68 DatasetTypeError,
69 InconsistentDataIdError,
70 MissingCollectionError,
71 MissingDatasetTypeError,
72 OrphanedRecordError,
73)
74from ..interfaces import ButlerAttributeExistsError, DatasetIdGenEnum
76if TYPE_CHECKING: 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 from .._registry import Registry
80class RegistryTests(ABC):
81 """Generic tests for the `Registry` class that can be subclassed to
82 generate tests for different configurations.
83 """
85 collectionsManager: Optional[str] = None
86 """Name of the collections manager class, if subclass provides value for
87 this member then it overrides name specified in default configuration
88 (`str`).
89 """
91 datasetsManager: Optional[str] = None
92 """Name of the datasets manager class, if subclass provides value for
93 this member then it overrides name specified in default configuration
94 (`str`).
95 """
97 @classmethod
98 @abstractmethod
99 def getDataDir(cls) -> str:
100 """Return the root directory containing test data YAML files."""
101 raise NotImplementedError()
103 def makeRegistryConfig(self) -> RegistryConfig:
104 """Create RegistryConfig used to create a registry.
106 This method should be called by a subclass from `makeRegistry`.
107 Returned instance will be pre-configured based on the values of class
108 members, and default-configured for all other parameters. Subclasses
109 that need default configuration should just instantiate
110 `RegistryConfig` directly.
111 """
112 config = RegistryConfig()
113 if self.collectionsManager:
114 config["managers", "collections"] = self.collectionsManager
115 if self.datasetsManager:
116 config["managers", "datasets"] = self.datasetsManager
117 return config
119 @abstractmethod
120 def makeRegistry(self, share_repo_with: Optional[Registry] = None) -> Optional[Registry]:
121 """Return the Registry instance to be tested.
123 Parameters
124 ----------
125 share_repo_with : `Registry`, optional
126 If provided, the new registry should point to the same data
127 repository as this existing registry.
129 Returns
130 -------
131 registry : `Registry`
132 New `Registry` instance, or `None` *only* if `share_repo_with` is
133 not `None` and this test case does not support that argument
134 (e.g. it is impossible with in-memory SQLite DBs).
135 """
136 raise NotImplementedError()
138 def loadData(self, registry: Registry, filename: str):
139 """Load registry test data from ``getDataDir/<filename>``,
140 which should be a YAML import/export file.
141 """
142 from ...transfers import YamlRepoImportBackend
144 with open(os.path.join(self.getDataDir(), filename), "r") as stream:
145 backend = YamlRepoImportBackend(stream, registry)
146 backend.register()
147 backend.load(datastore=None)
149 def checkQueryResults(self, results, expected):
150 """Check that a query results object contains expected values.
152 Parameters
153 ----------
154 results : `DataCoordinateQueryResults` or `DatasetQueryResults`
155 A lazy-evaluation query results object.
156 expected : `list`
157 A list of `DataCoordinate` o `DatasetRef` objects that should be
158 equal to results of the query, aside from ordering.
159 """
160 self.assertCountEqual(list(results), expected)
161 self.assertEqual(results.count(), len(expected))
162 if expected:
163 self.assertTrue(results.any())
164 else:
165 self.assertFalse(results.any())
167 def testOpaque(self):
168 """Tests for `Registry.registerOpaqueTable`,
169 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
170 `Registry.deleteOpaqueData`.
171 """
172 registry = self.makeRegistry()
173 table = "opaque_table_for_testing"
174 registry.registerOpaqueTable(
175 table,
176 spec=ddl.TableSpec(
177 fields=[
178 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
179 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
180 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
181 ],
182 ),
183 )
184 rows = [
185 {"id": 1, "name": "one", "count": None},
186 {"id": 2, "name": "two", "count": 5},
187 {"id": 3, "name": "three", "count": 6},
188 ]
189 registry.insertOpaqueData(table, *rows)
190 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
191 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
192 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
193 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=(1, 3), name=("one", "two"))))
194 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=(1, 2, 3))))
195 # Test very long IN clause which exceeds sqlite limit on number of
196 # parameters. SQLite says the limit is 32k but it looks like it is
197 # much higher.
198 self.assertEqual(rows, list(registry.fetchOpaqueData(table, id=list(range(300_000)))))
199 # Two IN clauses, each longer than 1k batch size, first with
200 # duplicates, second has matching elements in different batches (after
201 # sorting).
202 self.assertEqual(
203 rows[0:2],
204 list(
205 registry.fetchOpaqueData(
206 table,
207 id=list(range(1000)) + list(range(100, 0, -1)),
208 name=["one"] + [f"q{i}" for i in range(2200)] + ["two"],
209 )
210 ),
211 )
212 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
213 registry.deleteOpaqueData(table, id=3)
214 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
215 registry.deleteOpaqueData(table)
216 self.assertEqual([], list(registry.fetchOpaqueData(table)))
218 def testDatasetType(self):
219 """Tests for `Registry.registerDatasetType` and
220 `Registry.getDatasetType`.
221 """
222 registry = self.makeRegistry()
223 # Check valid insert
224 datasetTypeName = "test"
225 storageClass = StorageClass("testDatasetType")
226 registry.storageClasses.registerStorageClass(storageClass)
227 dimensions = registry.dimensions.extract(("instrument", "visit"))
228 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
229 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
230 # Inserting for the first time should return True
231 self.assertTrue(registry.registerDatasetType(inDatasetType))
232 outDatasetType1 = registry.getDatasetType(datasetTypeName)
233 self.assertEqual(outDatasetType1, inDatasetType)
235 # Re-inserting should work
236 self.assertFalse(registry.registerDatasetType(inDatasetType))
237 # Except when they are not identical
238 with self.assertRaises(ConflictingDefinitionError):
239 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
240 registry.registerDatasetType(nonIdenticalDatasetType)
242 # Template can be None
243 datasetTypeName = "testNoneTemplate"
244 storageClass = StorageClass("testDatasetType2")
245 registry.storageClasses.registerStorageClass(storageClass)
246 dimensions = registry.dimensions.extract(("instrument", "visit"))
247 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
248 registry.registerDatasetType(inDatasetType)
249 outDatasetType2 = registry.getDatasetType(datasetTypeName)
250 self.assertEqual(outDatasetType2, inDatasetType)
252 allTypes = set(registry.queryDatasetTypes())
253 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
255 def testDimensions(self):
256 """Tests for `Registry.insertDimensionData`,
257 `Registry.syncDimensionData`, and `Registry.expandDataId`.
258 """
259 registry = self.makeRegistry()
260 dimensionName = "instrument"
261 dimension = registry.dimensions[dimensionName]
262 dimensionValue = {
263 "name": "DummyCam",
264 "visit_max": 10,
265 "visit_system": 0,
266 "exposure_max": 10,
267 "detector_max": 2,
268 "class_name": "lsst.pipe.base.Instrument",
269 }
270 registry.insertDimensionData(dimensionName, dimensionValue)
271 # Inserting the same value twice should fail
272 with self.assertRaises(sqlalchemy.exc.IntegrityError):
273 registry.insertDimensionData(dimensionName, dimensionValue)
274 # expandDataId should retrieve the record we just inserted
275 self.assertEqual(
276 registry.expandDataId(instrument="DummyCam", graph=dimension.graph)
277 .records[dimensionName]
278 .toDict(),
279 dimensionValue,
280 )
281 # expandDataId should raise if there is no record with the given ID.
282 with self.assertRaises(DataIdValueError):
283 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
284 # band doesn't have a table; insert should fail.
285 with self.assertRaises(TypeError):
286 registry.insertDimensionData("band", {"band": "i"})
287 dimensionName2 = "physical_filter"
288 dimension2 = registry.dimensions[dimensionName2]
289 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
290 # Missing required dependency ("instrument") should fail
291 with self.assertRaises(KeyError):
292 registry.insertDimensionData(dimensionName2, dimensionValue2)
293 # Adding required dependency should fix the failure
294 dimensionValue2["instrument"] = "DummyCam"
295 registry.insertDimensionData(dimensionName2, dimensionValue2)
296 # expandDataId should retrieve the record we just inserted.
297 self.assertEqual(
298 registry.expandDataId(instrument="DummyCam", physical_filter="DummyCam_i", graph=dimension2.graph)
299 .records[dimensionName2]
300 .toDict(),
301 dimensionValue2,
302 )
303 # Use syncDimensionData to insert a new record successfully.
304 dimensionName3 = "detector"
305 dimensionValue3 = {
306 "instrument": "DummyCam",
307 "id": 1,
308 "full_name": "one",
309 "name_in_raft": "zero",
310 "purpose": "SCIENCE",
311 }
312 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
313 # Sync that again. Note that one field ("raft") is NULL, and that
314 # should be okay.
315 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
316 # Now try that sync with the same primary key but a different value.
317 # This should fail.
318 with self.assertRaises(ConflictingDefinitionError):
319 registry.syncDimensionData(
320 dimensionName3,
321 {
322 "instrument": "DummyCam",
323 "id": 1,
324 "full_name": "one",
325 "name_in_raft": "four",
326 "purpose": "SCIENCE",
327 },
328 )
330 @unittest.skipIf(np is None, "numpy not available.")
331 def testNumpyDataId(self):
332 """Test that we can use a numpy int in a dataId."""
333 registry = self.makeRegistry()
334 dimensionEntries = [
335 ("instrument", {"instrument": "DummyCam"}),
336 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
337 # Using an np.int64 here fails unless Records.fromDict is also
338 # patched to look for numbers.Integral
339 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
340 ]
341 for args in dimensionEntries:
342 registry.insertDimensionData(*args)
344 # Try a normal integer and something that looks like an int but
345 # is not.
346 for visit_id in (42, np.int64(42)):
347 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
348 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
349 self.assertEqual(expanded["visit"], int(visit_id))
350 self.assertIsInstance(expanded["visit"], int)
352 def testDataIdRelationships(self):
353 """Test that `Registry.expandDataId` raises an exception when the given
354 keys are inconsistent.
355 """
356 registry = self.makeRegistry()
357 self.loadData(registry, "base.yaml")
358 # Insert a few more dimension records for the next test.
359 registry.insertDimensionData(
360 "exposure",
361 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
362 )
363 registry.insertDimensionData(
364 "exposure",
365 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
366 )
367 registry.insertDimensionData(
368 "visit_system",
369 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
370 )
371 registry.insertDimensionData(
372 "visit",
373 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
374 )
375 registry.insertDimensionData(
376 "visit_definition",
377 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
378 )
379 with self.assertRaises(InconsistentDataIdError):
380 registry.expandDataId(
381 {"instrument": "Cam1", "visit": 1, "exposure": 2},
382 )
384 def testDataset(self):
385 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
386 and `Registry.removeDatasets`.
387 """
388 registry = self.makeRegistry()
389 self.loadData(registry, "base.yaml")
390 run = "tésτ"
391 registry.registerRun(run)
392 datasetType = registry.getDatasetType("bias")
393 dataId = {"instrument": "Cam1", "detector": 2}
394 (ref,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
395 outRef = registry.getDataset(ref.id)
396 self.assertIsNotNone(ref.id)
397 self.assertEqual(ref, outRef)
398 with self.assertRaises(ConflictingDefinitionError):
399 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
400 registry.removeDatasets([ref])
401 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
403 def testFindDataset(self):
404 """Tests for `Registry.findDataset`."""
405 registry = self.makeRegistry()
406 self.loadData(registry, "base.yaml")
407 run = "tésτ"
408 datasetType = registry.getDatasetType("bias")
409 dataId = {"instrument": "Cam1", "detector": 4}
410 registry.registerRun(run)
411 (inputRef,) = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
412 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
413 self.assertEqual(outputRef, inputRef)
414 # Check that retrieval with invalid dataId raises
415 with self.assertRaises(LookupError):
416 dataId = {"instrument": "Cam1"} # no detector
417 registry.findDataset(datasetType, dataId, collections=run)
418 # Check that different dataIds match to different datasets
419 dataId1 = {"instrument": "Cam1", "detector": 1}
420 (inputRef1,) = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
421 dataId2 = {"instrument": "Cam1", "detector": 2}
422 (inputRef2,) = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
423 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
424 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
425 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
426 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
427 # Check that requesting a non-existing dataId returns None
428 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
429 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
430 # Search more than one collection, in which two have the right
431 # dataset type and another does not.
432 registry.registerRun("empty")
433 self.loadData(registry, "datasets-uuid.yaml")
434 bias1 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_g"])
435 self.assertIsNotNone(bias1)
436 bias2 = registry.findDataset("bias", instrument="Cam1", detector=2, collections=["imported_r"])
437 self.assertIsNotNone(bias2)
438 self.assertEqual(
439 bias1,
440 registry.findDataset(
441 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "imported_r"]
442 ),
443 )
444 self.assertEqual(
445 bias2,
446 registry.findDataset(
447 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_r", "imported_g"]
448 ),
449 )
450 # Search more than one collection, with one of them a CALIBRATION
451 # collection.
452 registry.registerCollection("Cam1/calib", CollectionType.CALIBRATION)
453 timespan = Timespan(
454 begin=astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai"),
455 end=astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai"),
456 )
457 registry.certify("Cam1/calib", [bias2], timespan=timespan)
458 self.assertEqual(
459 bias1,
460 registry.findDataset(
461 "bias",
462 instrument="Cam1",
463 detector=2,
464 collections=["empty", "imported_g", "Cam1/calib"],
465 timespan=timespan,
466 ),
467 )
468 self.assertEqual(
469 bias2,
470 registry.findDataset(
471 "bias",
472 instrument="Cam1",
473 detector=2,
474 collections=["empty", "Cam1/calib", "imported_g"],
475 timespan=timespan,
476 ),
477 )
478 # If we try to search those same collections without a timespan, the
479 # first one works, since the CALIBRATION collection is irrelevant after
480 # the datast is found in the first collection. But the second one
481 # should raise.
482 self.assertEqual(
483 bias1,
484 registry.findDataset(
485 "bias", instrument="Cam1", detector=2, collections=["empty", "imported_g", "Cam1/calib"]
486 ),
487 )
488 with self.assertRaises(TypeError):
489 self.assertEqual(
490 bias2,
491 registry.findDataset(
492 "bias", instrument="Cam1", detector=2, collections=["empty", "Cam1/calib", "imported_g"]
493 ),
494 )
496 def testRemoveDatasetTypeSuccess(self):
497 """Test that Registry.removeDatasetType works when there are no
498 datasets of that type present.
499 """
500 registry = self.makeRegistry()
501 self.loadData(registry, "base.yaml")
502 registry.removeDatasetType("flat")
503 with self.assertRaises(MissingDatasetTypeError):
504 registry.getDatasetType("flat")
506 def testRemoveDatasetTypeFailure(self):
507 """Test that Registry.removeDatasetType raises when there are datasets
508 of that type present or if the dataset type is for a component.
509 """
510 registry = self.makeRegistry()
511 self.loadData(registry, "base.yaml")
512 self.loadData(registry, "datasets.yaml")
513 with self.assertRaises(OrphanedRecordError):
514 registry.removeDatasetType("flat")
515 with self.assertRaises(ValueError):
516 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
518 def testImportDatasetsUUID(self):
519 """Test for `Registry._importDatasets` with UUID dataset ID."""
520 if not self.datasetsManager.endswith(".ByDimensionsDatasetRecordStorageManagerUUID"):
521 self.skipTest(f"Unexpected dataset manager {self.datasetsManager}")
523 registry = self.makeRegistry()
524 self.loadData(registry, "base.yaml")
525 for run in range(6):
526 registry.registerRun(f"run{run}")
527 datasetTypeBias = registry.getDatasetType("bias")
528 datasetTypeFlat = registry.getDatasetType("flat")
529 dataIdBias1 = {"instrument": "Cam1", "detector": 1}
530 dataIdBias2 = {"instrument": "Cam1", "detector": 2}
531 dataIdFlat1 = {"instrument": "Cam1", "detector": 1, "physical_filter": "Cam1-G", "band": "g"}
533 dataset_id = uuid.uuid4()
534 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=dataset_id, run="run0")
535 (ref1,) = registry._importDatasets([ref])
536 # UUID is used without change
537 self.assertEqual(ref.id, ref1.id)
539 # All different failure modes
540 refs = (
541 # Importing same DatasetRef with different dataset ID is an error
542 DatasetRef(datasetTypeBias, dataIdBias1, id=uuid.uuid4(), run="run0"),
543 # Same DatasetId but different DataId
544 DatasetRef(datasetTypeBias, dataIdBias2, id=ref1.id, run="run0"),
545 DatasetRef(datasetTypeFlat, dataIdFlat1, id=ref1.id, run="run0"),
546 # Same DatasetRef and DatasetId but different run
547 DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run="run1"),
548 )
549 for ref in refs:
550 with self.assertRaises(ConflictingDefinitionError):
551 registry._importDatasets([ref])
553 # Test for non-unique IDs, they can be re-imported multiple times.
554 for run, idGenMode in ((2, DatasetIdGenEnum.DATAID_TYPE), (4, DatasetIdGenEnum.DATAID_TYPE_RUN)):
555 with self.subTest(idGenMode=idGenMode):
556 # Use integer dataset ID to force UUID calculation in _import
557 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run}")
558 (ref1,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
559 self.assertIsInstance(ref1.id, uuid.UUID)
560 self.assertEqual(ref1.id.version, 5)
562 # Importing it again is OK
563 (ref2,) = registry._importDatasets([ref1])
564 self.assertEqual(ref2.id, ref1.id)
566 # Cannot import to different run with the same ID
567 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=ref1.id, run=f"run{run+1}")
568 with self.assertRaises(ConflictingDefinitionError):
569 registry._importDatasets([ref])
571 ref = DatasetRef(datasetTypeBias, dataIdBias1, id=0, run=f"run{run+1}")
572 if idGenMode is DatasetIdGenEnum.DATAID_TYPE:
573 # Cannot import same DATAID_TYPE ref into a new run
574 with self.assertRaises(ConflictingDefinitionError):
575 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
576 else:
577 # DATAID_TYPE_RUN ref can be imported into a new run
578 (ref2,) = registry._importDatasets([ref], idGenerationMode=idGenMode)
580 def testDatasetTypeComponentQueries(self):
581 """Test component options when querying for dataset types.
583 All of the behavior here is deprecated, so many of these tests are
584 currently wrapped in a context to check that we get a warning whenever
585 a component dataset is actually returned.
586 """
587 registry = self.makeRegistry()
588 self.loadData(registry, "base.yaml")
589 self.loadData(registry, "datasets.yaml")
590 # Test querying for dataset types with different inputs.
591 # First query for all dataset types; components should only be included
592 # when components=True.
593 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes()).names)
594 self.assertEqual({"bias", "flat"}, NamedValueSet(registry.queryDatasetTypes(components=False)).names)
595 with self.assertWarns(FutureWarning):
596 self.assertLess(
597 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
598 NamedValueSet(registry.queryDatasetTypes(components=True)).names,
599 )
600 # Use a pattern that can match either parent or components. Again,
601 # components are only returned if components=True.
602 self.assertEqual({"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names)
603 self.assertEqual(
604 {"bias"}, NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
605 )
606 with self.assertWarns(FutureWarning):
607 self.assertLess(
608 {"bias", "bias.wcs"},
609 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names,
610 )
611 # This pattern matches only a component. In this case we also return
612 # that component dataset type if components=None.
613 with self.assertWarns(FutureWarning):
614 self.assertEqual(
615 {"bias.wcs"}, NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
616 )
617 self.assertEqual(
618 set(),
619 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names,
620 )
621 with self.assertWarns(FutureWarning):
622 self.assertEqual(
623 {"bias.wcs"},
624 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names,
625 )
626 # Add a dataset type using a StorageClass that we'll then remove; check
627 # that this does not affect our ability to query for dataset types
628 # (though it will warn).
629 tempStorageClass = StorageClass(
630 name="TempStorageClass",
631 components={
632 "data1": registry.storageClasses.getStorageClass("StructuredDataDict"),
633 "data2": registry.storageClasses.getStorageClass("StructuredDataDict"),
634 },
635 )
636 registry.storageClasses.registerStorageClass(tempStorageClass)
637 datasetType = DatasetType(
638 "temporary",
639 dimensions=["instrument"],
640 storageClass=tempStorageClass,
641 universe=registry.dimensions,
642 )
643 registry.registerDatasetType(datasetType)
644 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
645 datasetType._storageClass = None
646 del tempStorageClass
647 # Querying for all dataset types, including components, should include
648 # at least all non-component dataset types (and I don't want to
649 # enumerate all of the Exposure components for bias and flat here).
650 with self.assertWarns(FutureWarning):
651 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
652 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
653 self.assertIn("TempStorageClass", cm.output[0])
654 self.assertLess({"bias", "flat", "temporary"}, everything.names)
655 # It should not include "temporary.columns", because we tried to remove
656 # the storage class that would tell it about that. So if the next line
657 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
658 # this part of the test isn't doing anything, because the _unregister
659 # call about isn't simulating the real-life case we want it to
660 # simulate, in which different versions of daf_butler in entirely
661 # different Python processes interact with the same repo.
662 self.assertNotIn("temporary.data", everything.names)
663 # Query for dataset types that start with "temp". This should again
664 # not include the component, and also not fail.
665 with self.assertLogs("lsst.daf.butler.registry", logging.WARN) as cm:
666 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=True))
667 self.assertIn("TempStorageClass", cm.output[0])
668 self.assertEqual({"temporary"}, startsWithTemp.names)
669 # Querying with no components should not warn at all.
670 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
671 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*"), components=False))
672 # Must issue a warning of our own to be captured.
673 logging.getLogger("lsst.daf.butler.registries").warning("test message")
674 self.assertEqual(len(cm.output), 1)
675 self.assertIn("test message", cm.output[0])
677 def testComponentLookups(self):
678 """Test searching for component datasets via their parents.
680 All of the behavior here is deprecated, so many of these tests are
681 currently wrapped in a context to check that we get a warning whenever
682 a component dataset is actually returned.
683 """
684 registry = self.makeRegistry()
685 self.loadData(registry, "base.yaml")
686 self.loadData(registry, "datasets.yaml")
687 # Test getting the child dataset type (which does still exist in the
688 # Registry), and check for consistency with
689 # DatasetRef.makeComponentRef.
690 collection = "imported_g"
691 parentType = registry.getDatasetType("bias")
692 childType = registry.getDatasetType("bias.wcs")
693 parentRefResolved = registry.findDataset(
694 parentType, collections=collection, instrument="Cam1", detector=1
695 )
696 self.assertIsInstance(parentRefResolved, DatasetRef)
697 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
698 # Search for a single dataset with findDataset.
699 childRef1 = registry.findDataset("bias.wcs", collections=collection, dataId=parentRefResolved.dataId)
700 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
701 # Search for detector data IDs constrained by component dataset
702 # existence with queryDataIds.
703 with self.assertWarns(FutureWarning):
704 dataIds = registry.queryDataIds(
705 ["detector"],
706 datasets=["bias.wcs"],
707 collections=collection,
708 ).toSet()
709 self.assertEqual(
710 dataIds,
711 DataCoordinateSet(
712 {
713 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
714 for d in (1, 2, 3)
715 },
716 parentType.dimensions,
717 ),
718 )
719 # Search for multiple datasets of a single type with queryDatasets.
720 with self.assertWarns(FutureWarning):
721 childRefs2 = set(
722 registry.queryDatasets(
723 "bias.wcs",
724 collections=collection,
725 )
726 )
727 self.assertEqual(
728 {ref.unresolved() for ref in childRefs2}, {DatasetRef(childType, dataId) for dataId in dataIds}
729 )
731 def testCollections(self):
732 """Tests for registry methods that manage collections."""
733 registry = self.makeRegistry()
734 other_registry = self.makeRegistry(share_repo_with=registry)
735 self.loadData(registry, "base.yaml")
736 self.loadData(registry, "datasets.yaml")
737 run1 = "imported_g"
738 run2 = "imported_r"
739 # Test setting a collection docstring after it has been created.
740 registry.setCollectionDocumentation(run1, "doc for run1")
741 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
742 registry.setCollectionDocumentation(run1, None)
743 self.assertIsNone(registry.getCollectionDocumentation(run1))
744 datasetType = "bias"
745 # Find some datasets via their run's collection.
746 dataId1 = {"instrument": "Cam1", "detector": 1}
747 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
748 self.assertIsNotNone(ref1)
749 dataId2 = {"instrument": "Cam1", "detector": 2}
750 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
751 self.assertIsNotNone(ref2)
752 # Associate those into a new collection, then look for them there.
753 tag1 = "tag1"
754 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
755 # Check that we can query for old and new collections by type.
756 self.assertEqual(set(registry.queryCollections(collectionTypes=CollectionType.RUN)), {run1, run2})
757 self.assertEqual(
758 set(registry.queryCollections(collectionTypes={CollectionType.TAGGED, CollectionType.RUN})),
759 {tag1, run1, run2},
760 )
761 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
762 registry.associate(tag1, [ref1, ref2])
763 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
764 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
765 # Disassociate one and verify that we can't it there anymore...
766 registry.disassociate(tag1, [ref1])
767 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
768 # ...but we can still find ref2 in tag1, and ref1 in the run.
769 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
770 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
771 collections = set(registry.queryCollections())
772 self.assertEqual(collections, {run1, run2, tag1})
773 # Associate both refs into tag1 again; ref2 is already there, but that
774 # should be a harmless no-op.
775 registry.associate(tag1, [ref1, ref2])
776 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
777 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
778 # Get a different dataset (from a different run) that has the same
779 # dataset type and data ID as ref2.
780 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
781 self.assertNotEqual(ref2, ref2b)
782 # Attempting to associate that into tag1 should be an error.
783 with self.assertRaises(ConflictingDefinitionError):
784 registry.associate(tag1, [ref2b])
785 # That error shouldn't have messed up what we had before.
786 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
787 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
788 # Attempt to associate the conflicting dataset again, this time with
789 # a dataset that isn't in the collection and won't cause a conflict.
790 # Should also fail without modifying anything.
791 dataId3 = {"instrument": "Cam1", "detector": 3}
792 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
793 with self.assertRaises(ConflictingDefinitionError):
794 registry.associate(tag1, [ref3, ref2b])
795 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
796 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
797 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
798 # Register a chained collection that searches [tag1, run2]
799 chain1 = "chain1"
800 registry.registerCollection(chain1, type=CollectionType.CHAINED)
801 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
802 # Chained collection exists, but has no collections in it.
803 self.assertFalse(registry.getCollectionChain(chain1))
804 # If we query for all collections, we should get the chained collection
805 # only if we don't ask to flatten it (i.e. yield only its children).
806 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
807 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
808 # Attempt to set its child collections to something circular; that
809 # should fail.
810 with self.assertRaises(ValueError):
811 registry.setCollectionChain(chain1, [tag1, chain1])
812 # Add the child collections.
813 registry.setCollectionChain(chain1, [tag1, run2])
814 self.assertEqual(list(registry.getCollectionChain(chain1)), [tag1, run2])
815 self.assertEqual(registry.getCollectionParentChains(tag1), {chain1})
816 self.assertEqual(registry.getCollectionParentChains(run2), {chain1})
817 # Refresh the other registry that points to the same repo, and make
818 # sure it can see the things we've done (note that this does require
819 # an explicit refresh(); that's the documented behavior, because
820 # caching is ~impossible otherwise).
821 if other_registry is not None:
822 other_registry.refresh()
823 self.assertEqual(list(other_registry.getCollectionChain(chain1)), [tag1, run2])
824 self.assertEqual(other_registry.getCollectionParentChains(tag1), {chain1})
825 self.assertEqual(other_registry.getCollectionParentChains(run2), {chain1})
826 # Searching for dataId1 or dataId2 in the chain should return ref1 and
827 # ref2, because both are in tag1.
828 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
829 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
830 # Now disassociate ref2 from tag1. The search (for bias) with
831 # dataId2 in chain1 should then:
832 # 1. not find it in tag1
833 # 2. find a different dataset in run2
834 registry.disassociate(tag1, [ref2])
835 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
836 self.assertNotEqual(ref2b, ref2)
837 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
838 # Define a new chain so we can test recursive chains.
839 chain2 = "chain2"
840 registry.registerCollection(chain2, type=CollectionType.CHAINED)
841 registry.setCollectionChain(chain2, [run2, chain1])
842 self.assertEqual(registry.getCollectionParentChains(chain1), {chain2})
843 self.assertEqual(registry.getCollectionParentChains(run2), {chain1, chain2})
844 # Query for collections matching a regex.
845 self.assertCountEqual(
846 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
847 ["imported_r", "imported_g"],
848 )
849 # Query for collections matching a regex or an explicit str.
850 self.assertCountEqual(
851 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
852 ["imported_r", "imported_g", "chain1"],
853 )
854 # Search for bias with dataId1 should find it via tag1 in chain2,
855 # recursing, because is not in run1.
856 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
857 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
858 # Search for bias with dataId2 should find it in run2 (ref2b).
859 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
860 # Search for a flat that is in run2. That should not be found
861 # at the front of chain2, because of the restriction to bias
862 # on run2 there, but it should be found in at the end of chain1.
863 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
864 ref4 = registry.findDataset("flat", dataId4, collections=run2)
865 self.assertIsNotNone(ref4)
866 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
867 # Deleting a collection that's part of a CHAINED collection is not
868 # allowed, and is exception-safe.
869 with self.assertRaises(Exception):
870 registry.removeCollection(run2)
871 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
872 with self.assertRaises(Exception):
873 registry.removeCollection(chain1)
874 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
875 # Actually remove chain2, test that it's gone by asking for its type.
876 registry.removeCollection(chain2)
877 with self.assertRaises(MissingCollectionError):
878 registry.getCollectionType(chain2)
879 # Actually remove run2 and chain1, which should work now.
880 registry.removeCollection(chain1)
881 registry.removeCollection(run2)
882 with self.assertRaises(MissingCollectionError):
883 registry.getCollectionType(run2)
884 with self.assertRaises(MissingCollectionError):
885 registry.getCollectionType(chain1)
886 # Remove tag1 as well, just to test that we can remove TAGGED
887 # collections.
888 registry.removeCollection(tag1)
889 with self.assertRaises(MissingCollectionError):
890 registry.getCollectionType(tag1)
892 def testCollectionChainFlatten(self):
893 """Test that Registry.setCollectionChain obeys its 'flatten' option."""
894 registry = self.makeRegistry()
895 registry.registerCollection("inner", CollectionType.CHAINED)
896 registry.registerCollection("innermost", CollectionType.RUN)
897 registry.setCollectionChain("inner", ["innermost"])
898 registry.registerCollection("outer", CollectionType.CHAINED)
899 registry.setCollectionChain("outer", ["inner"], flatten=False)
900 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
901 registry.setCollectionChain("outer", ["inner"], flatten=True)
902 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
904 def testBasicTransaction(self):
905 """Test that all operations within a single transaction block are
906 rolled back if an exception propagates out of the block.
907 """
908 registry = self.makeRegistry()
909 storageClass = StorageClass("testDatasetType")
910 registry.storageClasses.registerStorageClass(storageClass)
911 with registry.transaction():
912 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
913 with self.assertRaises(ValueError):
914 with registry.transaction():
915 registry.insertDimensionData("instrument", {"name": "Cam2"})
916 raise ValueError("Oops, something went wrong")
917 # Cam1 should exist
918 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
919 # But Cam2 and Cam3 should both not exist
920 with self.assertRaises(DataIdValueError):
921 registry.expandDataId(instrument="Cam2")
922 with self.assertRaises(DataIdValueError):
923 registry.expandDataId(instrument="Cam3")
925 def testNestedTransaction(self):
926 """Test that operations within a transaction block are not rolled back
927 if an exception propagates out of an inner transaction block and is
928 then caught.
929 """
930 registry = self.makeRegistry()
931 dimension = registry.dimensions["instrument"]
932 dataId1 = {"instrument": "DummyCam"}
933 dataId2 = {"instrument": "DummyCam2"}
934 checkpointReached = False
935 with registry.transaction():
936 # This should be added and (ultimately) committed.
937 registry.insertDimensionData(dimension, dataId1)
938 with self.assertRaises(sqlalchemy.exc.IntegrityError):
939 with registry.transaction(savepoint=True):
940 # This does not conflict, and should succeed (but not
941 # be committed).
942 registry.insertDimensionData(dimension, dataId2)
943 checkpointReached = True
944 # This should conflict and raise, triggerring a rollback
945 # of the previous insertion within the same transaction
946 # context, but not the original insertion in the outer
947 # block.
948 registry.insertDimensionData(dimension, dataId1)
949 self.assertTrue(checkpointReached)
950 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
951 with self.assertRaises(DataIdValueError):
952 registry.expandDataId(dataId2, graph=dimension.graph)
954 def testInstrumentDimensions(self):
955 """Test queries involving only instrument dimensions, with no joins to
956 skymap."""
957 registry = self.makeRegistry()
959 # need a bunch of dimensions and datasets for test
960 registry.insertDimensionData(
961 "instrument", dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
962 )
963 registry.insertDimensionData(
964 "physical_filter",
965 dict(instrument="DummyCam", name="dummy_r", band="r"),
966 dict(instrument="DummyCam", name="dummy_i", band="i"),
967 )
968 registry.insertDimensionData(
969 "detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
970 )
971 registry.insertDimensionData(
972 "visit_system",
973 dict(instrument="DummyCam", id=1, name="default"),
974 )
975 registry.insertDimensionData(
976 "visit",
977 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
978 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
979 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
980 )
981 for i in range(1, 6):
982 registry.insertDimensionData(
983 "visit_detector_region",
984 dict(instrument="DummyCam", visit=10, detector=i),
985 dict(instrument="DummyCam", visit=11, detector=i),
986 dict(instrument="DummyCam", visit=20, detector=i),
987 )
988 registry.insertDimensionData(
989 "exposure",
990 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
991 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
992 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
993 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
994 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
995 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
996 )
997 registry.insertDimensionData(
998 "visit_definition",
999 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
1000 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
1001 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
1002 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
1003 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
1004 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
1005 )
1006 # dataset types
1007 run1 = "test1_r"
1008 run2 = "test2_r"
1009 tagged2 = "test2_t"
1010 registry.registerRun(run1)
1011 registry.registerRun(run2)
1012 registry.registerCollection(tagged2)
1013 storageClass = StorageClass("testDataset")
1014 registry.storageClasses.registerStorageClass(storageClass)
1015 rawType = DatasetType(
1016 name="RAW",
1017 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
1018 storageClass=storageClass,
1019 )
1020 registry.registerDatasetType(rawType)
1021 calexpType = DatasetType(
1022 name="CALEXP",
1023 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
1024 storageClass=storageClass,
1025 )
1026 registry.registerDatasetType(calexpType)
1028 # add pre-existing datasets
1029 for exposure in (100, 101, 110, 111):
1030 for detector in (1, 2, 3):
1031 # note that only 3 of 5 detectors have datasets
1032 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1033 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
1034 # exposures 100 and 101 appear in both run1 and tagged2.
1035 # 100 has different datasets in the different collections
1036 # 101 has the same dataset in both collections.
1037 if exposure == 100:
1038 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1039 if exposure in (100, 101):
1040 registry.associate(tagged2, [ref])
1041 # Add pre-existing datasets to tagged2.
1042 for exposure in (200, 201):
1043 for detector in (3, 4, 5):
1044 # note that only 3 of 5 detectors have datasets
1045 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
1046 (ref,) = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
1047 registry.associate(tagged2, [ref])
1049 dimensions = DimensionGraph(
1050 registry.dimensions, dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
1051 )
1052 # Test that single dim string works as well as list of str
1053 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
1054 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
1055 self.assertEqual(rows, rowsI)
1056 # with empty expression
1057 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
1058 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1059 for dataId in rows:
1060 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1061 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
1062 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
1063 self.assertEqual(
1064 packer1.unpack(packer1.pack(dataId)),
1065 DataCoordinate.standardize(dataId, graph=packer1.dimensions),
1066 )
1067 self.assertEqual(
1068 packer2.unpack(packer2.pack(dataId)),
1069 DataCoordinate.standardize(dataId, graph=packer2.dimensions),
1070 )
1071 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
1072 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111))
1073 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
1074 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1076 # second collection
1077 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
1078 self.assertEqual(len(rows), 4 * 3) # 4 exposures times 3 detectors
1079 for dataId in rows:
1080 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1081 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 200, 201))
1082 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
1083 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1085 # with two input datasets
1086 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
1087 self.assertEqual(len(set(rows)), 6 * 3) # 6 exposures times 3 detectors; set needed to de-dupe
1088 for dataId in rows:
1089 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
1090 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101, 110, 111, 200, 201))
1091 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
1092 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
1094 # limit to single visit
1095 rows = registry.queryDataIds(
1096 dimensions, datasets=rawType, collections=run1, where="visit = 10", instrument="DummyCam"
1097 ).toSet()
1098 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1099 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1100 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1101 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1103 # more limiting expression, using link names instead of Table.column
1104 rows = registry.queryDataIds(
1105 dimensions,
1106 datasets=rawType,
1107 collections=run1,
1108 where="visit = 10 and detector > 1 and 'DummyCam'=instrument",
1109 ).toSet()
1110 self.assertEqual(len(rows), 2 * 2) # 2 exposures times 2 detectors
1111 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
1112 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
1113 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
1115 # queryDataIds with only one of `datasets` and `collections` is an
1116 # error.
1117 with self.assertRaises(CollectionError):
1118 registry.queryDataIds(dimensions, datasets=rawType)
1119 with self.assertRaises(ArgumentError):
1120 registry.queryDataIds(dimensions, collections=run1)
1122 # expression excludes everything
1123 rows = registry.queryDataIds(
1124 dimensions, datasets=rawType, collections=run1, where="visit > 1000", instrument="DummyCam"
1125 ).toSet()
1126 self.assertEqual(len(rows), 0)
1128 # Selecting by physical_filter, this is not in the dimensions, but it
1129 # is a part of the full expression so it should work too.
1130 rows = registry.queryDataIds(
1131 dimensions,
1132 datasets=rawType,
1133 collections=run1,
1134 where="physical_filter = 'dummy_r'",
1135 instrument="DummyCam",
1136 ).toSet()
1137 self.assertEqual(len(rows), 2 * 3) # 2 exposures times 3 detectors
1138 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
1139 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
1140 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
1142 def testSkyMapDimensions(self):
1143 """Tests involving only skymap dimensions, no joins to instrument."""
1144 registry = self.makeRegistry()
1146 # need a bunch of dimensions and datasets for test, we want
1147 # "band" in the test so also have to add physical_filter
1148 # dimensions
1149 registry.insertDimensionData("instrument", dict(instrument="DummyCam"))
1150 registry.insertDimensionData(
1151 "physical_filter",
1152 dict(instrument="DummyCam", name="dummy_r", band="r"),
1153 dict(instrument="DummyCam", name="dummy_i", band="i"),
1154 )
1155 registry.insertDimensionData("skymap", dict(name="DummyMap", hash="sha!".encode("utf8")))
1156 for tract in range(10):
1157 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
1158 registry.insertDimensionData(
1159 "patch",
1160 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0) for patch in range(10)],
1161 )
1163 # dataset types
1164 run = "tésτ"
1165 registry.registerRun(run)
1166 storageClass = StorageClass("testDataset")
1167 registry.storageClasses.registerStorageClass(storageClass)
1168 calexpType = DatasetType(
1169 name="deepCoadd_calexp",
1170 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1171 storageClass=storageClass,
1172 )
1173 registry.registerDatasetType(calexpType)
1174 mergeType = DatasetType(
1175 name="deepCoadd_mergeDet",
1176 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
1177 storageClass=storageClass,
1178 )
1179 registry.registerDatasetType(mergeType)
1180 measType = DatasetType(
1181 name="deepCoadd_meas",
1182 dimensions=registry.dimensions.extract(("skymap", "tract", "patch", "band")),
1183 storageClass=storageClass,
1184 )
1185 registry.registerDatasetType(measType)
1187 dimensions = DimensionGraph(
1188 registry.dimensions,
1189 dimensions=(
1190 calexpType.dimensions.required | mergeType.dimensions.required | measType.dimensions.required
1191 ),
1192 )
1194 # add pre-existing datasets
1195 for tract in (1, 3, 5):
1196 for patch in (2, 4, 6, 7):
1197 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
1198 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
1199 for aFilter in ("i", "r"):
1200 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
1201 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
1203 # with empty expression
1204 rows = registry.queryDataIds(dimensions, datasets=[calexpType, mergeType], collections=run).toSet()
1205 self.assertEqual(len(rows), 3 * 4 * 2) # 4 tracts x 4 patches x 2 filters
1206 for dataId in rows:
1207 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
1208 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1209 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1210 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1212 # limit to 2 tracts and 2 patches
1213 rows = registry.queryDataIds(
1214 dimensions,
1215 datasets=[calexpType, mergeType],
1216 collections=run,
1217 where="tract IN (1, 5) AND patch IN (2, 7)",
1218 skymap="DummyMap",
1219 ).toSet()
1220 self.assertEqual(len(rows), 2 * 2 * 2) # 2 tracts x 2 patches x 2 filters
1221 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
1222 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
1223 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
1225 # limit to single filter
1226 rows = registry.queryDataIds(
1227 dimensions, datasets=[calexpType, mergeType], collections=run, where="band = 'i'"
1228 ).toSet()
1229 self.assertEqual(len(rows), 3 * 4 * 1) # 4 tracts x 4 patches x 2 filters
1230 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
1231 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
1232 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
1234 # Specifying non-existing skymap is an exception
1235 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
1236 rows = registry.queryDataIds(
1237 dimensions, datasets=[calexpType, mergeType], collections=run, where="skymap = 'Mars'"
1238 ).toSet()
1240 def testSpatialJoin(self):
1241 """Test queries that involve spatial overlap joins."""
1242 registry = self.makeRegistry()
1243 self.loadData(registry, "hsc-rc2-subset.yaml")
1245 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
1246 # the TopologicalFamily they belong to. We'll relate all elements in
1247 # each family to all of the elements in each other family.
1248 families = defaultdict(set)
1249 # Dictionary of {element.name: {dataId: region}}.
1250 regions = {}
1251 for element in registry.dimensions.getDatabaseElements():
1252 if element.spatial is not None:
1253 families[element.spatial.name].add(element)
1254 regions[element.name] = {
1255 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1256 }
1258 # If this check fails, it's not necessarily a problem - it may just be
1259 # a reasonable change to the default dimension definitions - but the
1260 # test below depends on there being more than one family to do anything
1261 # useful.
1262 self.assertEqual(len(families), 2)
1264 # Overlap DatabaseDimensionElements with each other.
1265 for family1, family2 in itertools.combinations(families, 2):
1266 for element1, element2 in itertools.product(families[family1], families[family2]):
1267 graph = DimensionGraph.union(element1.graph, element2.graph)
1268 # Construct expected set of overlapping data IDs via a
1269 # brute-force comparison of the regions we've already fetched.
1270 expected = {
1271 DataCoordinate.standardize({**dataId1.byName(), **dataId2.byName()}, graph=graph)
1272 for (dataId1, region1), (dataId2, region2) in itertools.product(
1273 regions[element1.name].items(), regions[element2.name].items()
1274 )
1275 if not region1.isDisjointFrom(region2)
1276 }
1277 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1278 queried = set(registry.queryDataIds(graph))
1279 self.assertEqual(expected, queried)
1281 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1282 commonSkyPix = registry.dimensions.commonSkyPix
1283 for elementName, regions in regions.items():
1284 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1285 expected = set()
1286 for dataId, region in regions.items():
1287 for begin, end in commonSkyPix.pixelization.envelope(region):
1288 expected.update(
1289 DataCoordinate.standardize({commonSkyPix.name: index, **dataId.byName()}, graph=graph)
1290 for index in range(begin, end)
1291 )
1292 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1293 queried = set(registry.queryDataIds(graph))
1294 self.assertEqual(expected, queried)
1296 def testAbstractQuery(self):
1297 """Test that we can run a query that just lists the known
1298 bands. This is tricky because band is
1299 backed by a query against physical_filter.
1300 """
1301 registry = self.makeRegistry()
1302 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1303 registry.insertDimensionData(
1304 "physical_filter",
1305 dict(instrument="DummyCam", name="dummy_i", band="i"),
1306 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1307 dict(instrument="DummyCam", name="dummy_r", band="r"),
1308 )
1309 rows = registry.queryDataIds(["band"]).toSet()
1310 self.assertCountEqual(
1311 rows,
1312 [
1313 DataCoordinate.standardize(band="i", universe=registry.dimensions),
1314 DataCoordinate.standardize(band="r", universe=registry.dimensions),
1315 ],
1316 )
1318 def testAttributeManager(self):
1319 """Test basic functionality of attribute manager."""
1320 # number of attributes with schema versions in a fresh database,
1321 # 6 managers with 3 records per manager, plus config for dimensions
1322 VERSION_COUNT = 6 * 3 + 1
1324 registry = self.makeRegistry()
1325 attributes = registry._managers.attributes
1327 # check what get() returns for non-existing key
1328 self.assertIsNone(attributes.get("attr"))
1329 self.assertEqual(attributes.get("attr", ""), "")
1330 self.assertEqual(attributes.get("attr", "Value"), "Value")
1331 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1333 # cannot store empty key or value
1334 with self.assertRaises(ValueError):
1335 attributes.set("", "value")
1336 with self.assertRaises(ValueError):
1337 attributes.set("attr", "")
1339 # set value of non-existing key
1340 attributes.set("attr", "value")
1341 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1342 self.assertEqual(attributes.get("attr"), "value")
1344 # update value of existing key
1345 with self.assertRaises(ButlerAttributeExistsError):
1346 attributes.set("attr", "value2")
1348 attributes.set("attr", "value2", force=True)
1349 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1350 self.assertEqual(attributes.get("attr"), "value2")
1352 # delete existing key
1353 self.assertTrue(attributes.delete("attr"))
1354 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1356 # delete non-existing key
1357 self.assertFalse(attributes.delete("non-attr"))
1359 # store bunch of keys and get the list back
1360 data = [
1361 ("version.core", "1.2.3"),
1362 ("version.dimensions", "3.2.1"),
1363 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1364 ]
1365 for key, value in data:
1366 attributes.set(key, value)
1367 items = dict(attributes.items())
1368 for key, value in data:
1369 self.assertEqual(items[key], value)
1371 def testQueryDatasetsDeduplication(self):
1372 """Test that the findFirst option to queryDatasets selects datasets
1373 from collections in the order given".
1374 """
1375 registry = self.makeRegistry()
1376 self.loadData(registry, "base.yaml")
1377 self.loadData(registry, "datasets.yaml")
1378 self.assertCountEqual(
1379 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1380 [
1381 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1382 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1383 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1384 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1385 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1386 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1387 ],
1388 )
1389 self.assertCountEqual(
1390 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"], findFirst=True)),
1391 [
1392 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1393 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1394 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1395 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1396 ],
1397 )
1398 self.assertCountEqual(
1399 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"], findFirst=True)),
1400 [
1401 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1402 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1403 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1404 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1405 ],
1406 )
1408 def testQueryResults(self):
1409 """Test querying for data IDs and then manipulating the QueryResults
1410 object returned to perform other queries.
1411 """
1412 registry = self.makeRegistry()
1413 self.loadData(registry, "base.yaml")
1414 self.loadData(registry, "datasets.yaml")
1415 bias = registry.getDatasetType("bias")
1416 flat = registry.getDatasetType("flat")
1417 # Obtain expected results from methods other than those we're testing
1418 # here. That includes:
1419 # - the dimensions of the data IDs we want to query:
1420 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1421 # - the dimensions of some other data IDs we'll extract from that:
1422 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1423 # - the data IDs we expect to obtain from the first queries:
1424 expectedDataIds = DataCoordinateSet(
1425 {
1426 DataCoordinate.standardize(
1427 instrument="Cam1", detector=d, physical_filter=p, universe=registry.dimensions
1428 )
1429 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1430 },
1431 graph=expectedGraph,
1432 hasFull=False,
1433 hasRecords=False,
1434 )
1435 # - the flat datasets we expect to find from those data IDs, in just
1436 # one collection (so deduplication is irrelevant):
1437 expectedFlats = [
1438 registry.findDataset(
1439 flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1", collections="imported_r"
1440 ),
1441 registry.findDataset(
1442 flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1", collections="imported_r"
1443 ),
1444 registry.findDataset(
1445 flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2", collections="imported_r"
1446 ),
1447 ]
1448 # - the data IDs we expect to extract from that:
1449 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1450 # - the bias datasets we expect to find from those data IDs, after we
1451 # subset-out the physical_filter dimension, both with duplicates:
1452 expectedAllBiases = [
1453 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1454 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1455 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1456 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1457 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1458 ]
1459 # - ...and without duplicates:
1460 expectedDeduplicatedBiases = [
1461 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1462 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1463 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1464 ]
1465 # Test against those expected results, using a "lazy" query for the
1466 # data IDs (which re-executes that query each time we use it to do
1467 # something new).
1468 dataIds = registry.queryDataIds(
1469 ["detector", "physical_filter"],
1470 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1471 instrument="Cam1",
1472 )
1473 self.assertEqual(dataIds.graph, expectedGraph)
1474 self.assertEqual(dataIds.toSet(), expectedDataIds)
1475 self.assertCountEqual(
1476 list(
1477 dataIds.findDatasets(
1478 flat,
1479 collections=["imported_r"],
1480 )
1481 ),
1482 expectedFlats,
1483 )
1484 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1485 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1486 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1487 self.assertCountEqual(
1488 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=False)),
1489 expectedAllBiases,
1490 )
1491 self.assertCountEqual(
1492 list(subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)),
1493 expectedDeduplicatedBiases,
1494 )
1496 # Check dimensions match.
1497 with self.assertRaises(ValueError):
1498 subsetDataIds.findDatasets("flat", collections=["imported_r", "imported_g"], findFirst=True)
1500 # Use a component dataset type.
1501 self.assertCountEqual(
1502 [
1503 ref.makeComponentRef("image")
1504 for ref in subsetDataIds.findDatasets(
1505 bias,
1506 collections=["imported_r", "imported_g"],
1507 findFirst=False,
1508 )
1509 ],
1510 [ref.makeComponentRef("image") for ref in expectedAllBiases],
1511 )
1513 # Use a named dataset type that does not exist and a dataset type
1514 # object that does not exist.
1515 unknown_type = DatasetType("not_known", dimensions=bias.dimensions, storageClass="Exposure")
1517 # Test both string name and dataset type object.
1518 test_type: Union[str, DatasetType]
1519 for test_type, test_type_name in (
1520 (unknown_type, unknown_type.name),
1521 (unknown_type.name, unknown_type.name),
1522 ):
1523 with self.assertRaisesRegex(DatasetTypeError, expected_regex=test_type_name):
1524 list(
1525 subsetDataIds.findDatasets(
1526 test_type, collections=["imported_r", "imported_g"], findFirst=True
1527 )
1528 )
1530 # Materialize the bias dataset queries (only) by putting the results
1531 # into temporary tables, then repeat those tests.
1532 with subsetDataIds.findDatasets(
1533 bias, collections=["imported_r", "imported_g"], findFirst=False
1534 ).materialize() as biases:
1535 self.assertCountEqual(list(biases), expectedAllBiases)
1536 with subsetDataIds.findDatasets(
1537 bias, collections=["imported_r", "imported_g"], findFirst=True
1538 ).materialize() as biases:
1539 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1540 # Materialize the data ID subset query, but not the dataset queries.
1541 with subsetDataIds.materialize() as subsetDataIds:
1542 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1543 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1544 self.assertCountEqual(
1545 list(
1546 subsetDataIds.findDatasets(
1547 bias, collections=["imported_r", "imported_g"], findFirst=False
1548 )
1549 ),
1550 expectedAllBiases,
1551 )
1552 self.assertCountEqual(
1553 list(
1554 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1555 ),
1556 expectedDeduplicatedBiases,
1557 )
1558 # Materialize the dataset queries, too.
1559 with subsetDataIds.findDatasets(
1560 bias, collections=["imported_r", "imported_g"], findFirst=False
1561 ).materialize() as biases:
1562 self.assertCountEqual(list(biases), expectedAllBiases)
1563 with subsetDataIds.findDatasets(
1564 bias, collections=["imported_r", "imported_g"], findFirst=True
1565 ).materialize() as biases:
1566 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1567 # Materialize the original query, but none of the follow-up queries.
1568 with dataIds.materialize() as dataIds:
1569 self.assertEqual(dataIds.graph, expectedGraph)
1570 self.assertEqual(dataIds.toSet(), expectedDataIds)
1571 self.assertCountEqual(
1572 list(
1573 dataIds.findDatasets(
1574 flat,
1575 collections=["imported_r"],
1576 )
1577 ),
1578 expectedFlats,
1579 )
1580 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1581 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1582 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1583 self.assertCountEqual(
1584 list(
1585 subsetDataIds.findDatasets(
1586 bias, collections=["imported_r", "imported_g"], findFirst=False
1587 )
1588 ),
1589 expectedAllBiases,
1590 )
1591 self.assertCountEqual(
1592 list(
1593 subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"], findFirst=True)
1594 ),
1595 expectedDeduplicatedBiases,
1596 )
1597 # Materialize just the bias dataset queries.
1598 with subsetDataIds.findDatasets(
1599 bias, collections=["imported_r", "imported_g"], findFirst=False
1600 ).materialize() as biases:
1601 self.assertCountEqual(list(biases), expectedAllBiases)
1602 with subsetDataIds.findDatasets(
1603 bias, collections=["imported_r", "imported_g"], findFirst=True
1604 ).materialize() as biases:
1605 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1606 # Materialize the subset data ID query, but not the dataset
1607 # queries.
1608 with subsetDataIds.materialize() as subsetDataIds:
1609 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1610 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1611 self.assertCountEqual(
1612 list(
1613 subsetDataIds.findDatasets(
1614 bias, collections=["imported_r", "imported_g"], findFirst=False
1615 )
1616 ),
1617 expectedAllBiases,
1618 )
1619 self.assertCountEqual(
1620 list(
1621 subsetDataIds.findDatasets(
1622 bias, collections=["imported_r", "imported_g"], findFirst=True
1623 )
1624 ),
1625 expectedDeduplicatedBiases,
1626 )
1627 # Materialize the bias dataset queries, too, so now we're
1628 # materializing every single step.
1629 with subsetDataIds.findDatasets(
1630 bias, collections=["imported_r", "imported_g"], findFirst=False
1631 ).materialize() as biases:
1632 self.assertCountEqual(list(biases), expectedAllBiases)
1633 with subsetDataIds.findDatasets(
1634 bias, collections=["imported_r", "imported_g"], findFirst=True
1635 ).materialize() as biases:
1636 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1638 def testStorageClassPropagation(self):
1639 """Test that queries for datasets respect the storage class passed in
1640 as part of a full dataset type.
1641 """
1642 registry = self.makeRegistry()
1643 self.loadData(registry, "base.yaml")
1644 dataset_type_in_registry = DatasetType(
1645 "tbl", dimensions=["instrument"], storageClass="DataFrame", universe=registry.dimensions
1646 )
1647 registry.registerDatasetType(dataset_type_in_registry)
1648 run = "run1"
1649 registry.registerRun(run)
1650 (inserted_ref,) = registry.insertDatasets(
1651 dataset_type_in_registry, [registry.expandDataId(instrument="Cam1")], run=run
1652 )
1653 self.assertEqual(inserted_ref.datasetType, dataset_type_in_registry)
1654 query_dataset_type = DatasetType(
1655 "tbl", dimensions=["instrument"], storageClass="ArrowAstropy", universe=registry.dimensions
1656 )
1657 self.assertNotEqual(dataset_type_in_registry, query_dataset_type)
1658 query_datasets_result = registry.queryDatasets(query_dataset_type, collections=[run])
1659 self.assertEqual(query_datasets_result.parentDatasetType, query_dataset_type) # type: ignore
1660 (query_datasets_ref,) = query_datasets_result
1661 self.assertEqual(query_datasets_ref.datasetType, query_dataset_type)
1662 query_data_ids_find_datasets_result = registry.queryDataIds(["instrument"]).findDatasets(
1663 query_dataset_type, collections=[run]
1664 )
1665 self.assertEqual(query_data_ids_find_datasets_result.parentDatasetType, query_dataset_type)
1666 (query_data_ids_find_datasets_ref,) = query_data_ids_find_datasets_result
1667 self.assertEqual(query_data_ids_find_datasets_ref.datasetType, query_dataset_type)
1668 query_dataset_types_result = registry.queryDatasetTypes(query_dataset_type)
1669 self.assertEqual(list(query_dataset_types_result), [query_dataset_type])
1670 find_dataset_ref = registry.findDataset(query_dataset_type, instrument="Cam1", collections=[run])
1671 self.assertEqual(find_dataset_ref.datasetType, query_dataset_type)
1673 def testEmptyDimensionsQueries(self):
1674 """Test Query and QueryResults objects in the case where there are no
1675 dimensions.
1676 """
1677 # Set up test data: one dataset type, two runs, one dataset in each.
1678 registry = self.makeRegistry()
1679 self.loadData(registry, "base.yaml")
1680 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1681 registry.registerDatasetType(schema)
1682 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1683 run1 = "run1"
1684 run2 = "run2"
1685 registry.registerRun(run1)
1686 registry.registerRun(run2)
1687 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1688 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1689 # Query directly for both of the datasets, and each one, one at a time.
1690 self.checkQueryResults(
1691 registry.queryDatasets(schema, collections=[run1, run2], findFirst=False), [dataset1, dataset2]
1692 )
1693 self.checkQueryResults(
1694 registry.queryDatasets(schema, collections=[run1, run2], findFirst=True),
1695 [dataset1],
1696 )
1697 self.checkQueryResults(
1698 registry.queryDatasets(schema, collections=[run2, run1], findFirst=True),
1699 [dataset2],
1700 )
1701 # Query for data IDs with no dimensions.
1702 dataIds = registry.queryDataIds([])
1703 self.checkQueryResults(dataIds, [dataId])
1704 # Use queried data IDs to find the datasets.
1705 self.checkQueryResults(
1706 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1707 [dataset1, dataset2],
1708 )
1709 self.checkQueryResults(
1710 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1711 [dataset1],
1712 )
1713 self.checkQueryResults(
1714 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1715 [dataset2],
1716 )
1717 # Now materialize the data ID query results and repeat those tests.
1718 with dataIds.materialize() as dataIds:
1719 self.checkQueryResults(dataIds, [dataId])
1720 self.checkQueryResults(
1721 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1722 [dataset1],
1723 )
1724 self.checkQueryResults(
1725 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1726 [dataset2],
1727 )
1728 # Query for non-empty data IDs, then subset that to get the empty one.
1729 # Repeat the above tests starting from that.
1730 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1731 self.checkQueryResults(dataIds, [dataId])
1732 self.checkQueryResults(
1733 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1734 [dataset1, dataset2],
1735 )
1736 self.checkQueryResults(
1737 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1738 [dataset1],
1739 )
1740 self.checkQueryResults(
1741 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1742 [dataset2],
1743 )
1744 with dataIds.materialize() as dataIds:
1745 self.checkQueryResults(dataIds, [dataId])
1746 self.checkQueryResults(
1747 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1748 [dataset1, dataset2],
1749 )
1750 self.checkQueryResults(
1751 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1752 [dataset1],
1753 )
1754 self.checkQueryResults(
1755 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1756 [dataset2],
1757 )
1758 # Query for non-empty data IDs, then materialize, then subset to get
1759 # the empty one. Repeat again.
1760 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1761 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1762 self.checkQueryResults(dataIds, [dataId])
1763 self.checkQueryResults(
1764 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1765 [dataset1, dataset2],
1766 )
1767 self.checkQueryResults(
1768 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1769 [dataset1],
1770 )
1771 self.checkQueryResults(
1772 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1773 [dataset2],
1774 )
1775 with dataIds.materialize() as dataIds:
1776 self.checkQueryResults(dataIds, [dataId])
1777 self.checkQueryResults(
1778 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False),
1779 [dataset1, dataset2],
1780 )
1781 self.checkQueryResults(
1782 dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True),
1783 [dataset1],
1784 )
1785 self.checkQueryResults(
1786 dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True),
1787 [dataset2],
1788 )
1789 # Query for non-empty data IDs with a constraint on an empty-data-ID
1790 # dataset that exists.
1791 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=...)
1792 self.checkQueryResults(
1793 dataIds.subset(unique=True),
1794 [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)],
1795 )
1796 # Again query for non-empty data IDs with a constraint on empty-data-ID
1797 # datasets, but when the datasets don't exist. We delete the existing
1798 # dataset and query just that collection rather than creating a new
1799 # empty collection because this is a bit less likely for our build-time
1800 # logic to shortcut-out (via the collection summaries), and such a
1801 # shortcut would make this test a bit more trivial than we'd like.
1802 registry.removeDatasets([dataset2])
1803 dataIds = registry.queryDataIds(["instrument"], datasets="schema", collections=run2)
1804 self.checkQueryResults(dataIds, [])
1806 def testDimensionDataModifications(self):
1807 """Test that modifying dimension records via:
1808 syncDimensionData(..., update=True) and
1809 insertDimensionData(..., replace=True) works as expected, even in the
1810 presence of datasets using those dimensions and spatial overlap
1811 relationships.
1812 """
1814 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1815 """Unpack a sphgeom.RangeSet into the integers it contains."""
1816 for begin, end in ranges:
1817 yield from range(begin, end)
1819 def range_set_hull(
1820 ranges: lsst.sphgeom.RangeSet,
1821 pixelization: lsst.sphgeom.HtmPixelization,
1822 ) -> lsst.sphgeom.ConvexPolygon:
1823 """Create a ConvexPolygon hull of the region defined by a set of
1824 HTM pixelization index ranges.
1825 """
1826 points = []
1827 for index in unpack_range_set(ranges):
1828 points.extend(pixelization.triangle(index).getVertices())
1829 return lsst.sphgeom.ConvexPolygon(points)
1831 # Use HTM to set up an initial parent region (one arbitrary trixel)
1832 # and four child regions (the trixels within the parent at the next
1833 # level. We'll use the parent as a tract/visit region and the children
1834 # as its patch/visit_detector regions.
1835 registry = self.makeRegistry()
1836 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1837 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1838 index = 12288
1839 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1840 assert htm6.universe().contains(child_ranges_small)
1841 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1842 parent_region_small = lsst.sphgeom.ConvexPolygon(
1843 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1844 )
1845 assert all(parent_region_small.contains(c) for c in child_regions_small)
1846 # Make a larger version of each child region, defined to be the set of
1847 # htm6 trixels that overlap the original's bounding circle. Make a new
1848 # parent that's the convex hull of the new children.
1849 child_regions_large = [
1850 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6) for c in child_regions_small
1851 ]
1852 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1853 parent_region_large = lsst.sphgeom.ConvexPolygon(
1854 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1855 )
1856 assert all(parent_region_large.contains(c) for c in child_regions_large)
1857 assert parent_region_large.contains(parent_region_small)
1858 assert not parent_region_small.contains(parent_region_large)
1859 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1860 # Find some commonSkyPix indices that overlap the large regions but not
1861 # overlap the small regions. We use commonSkyPix here to make sure the
1862 # real tests later involve what's in the database, not just post-query
1863 # filtering of regions.
1864 child_difference_indices = []
1865 for large, small in zip(child_regions_large, child_regions_small):
1866 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1867 assert difference, "if this is empty, we can't test anything useful with these regions"
1868 assert all(
1869 not commonSkyPix.triangle(d).isDisjointFrom(large)
1870 and commonSkyPix.triangle(d).isDisjointFrom(small)
1871 for d in difference
1872 )
1873 child_difference_indices.append(difference)
1874 parent_difference_indices = list(
1875 unpack_range_set(
1876 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1877 )
1878 )
1879 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1880 assert all(
1881 (
1882 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1883 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1884 )
1885 for d in parent_difference_indices
1886 )
1887 # Now that we've finally got those regions, we'll insert the large ones
1888 # as tract/patch dimension records.
1889 skymap_name = "testing_v1"
1890 registry.insertDimensionData(
1891 "skymap",
1892 {
1893 "name": skymap_name,
1894 "hash": bytes([42]),
1895 "tract_max": 1,
1896 "patch_nx_max": 2,
1897 "patch_ny_max": 2,
1898 },
1899 )
1900 registry.insertDimensionData("tract", {"skymap": skymap_name, "id": 0, "region": parent_region_large})
1901 registry.insertDimensionData(
1902 "patch",
1903 *[
1904 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1905 for n, c in enumerate(child_regions_large)
1906 ],
1907 )
1908 # Add at dataset that uses these dimensions to make sure that modifying
1909 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1910 # implement insert with replace=True as delete-then-insert).
1911 dataset_type = DatasetType(
1912 "coadd",
1913 dimensions=["tract", "patch"],
1914 universe=registry.dimensions,
1915 storageClass="Exposure",
1916 )
1917 registry.registerDatasetType(dataset_type)
1918 registry.registerCollection("the_run", CollectionType.RUN)
1919 registry.insertDatasets(
1920 dataset_type,
1921 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1922 run="the_run",
1923 )
1924 # Query for tracts and patches that overlap some "difference" htm9
1925 # pixels; there should be overlaps, because the database has
1926 # the "large" suite of regions.
1927 self.assertEqual(
1928 {0},
1929 {
1930 data_id["tract"]
1931 for data_id in registry.queryDataIds(
1932 ["tract"],
1933 skymap=skymap_name,
1934 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1935 )
1936 },
1937 )
1938 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1939 self.assertIn(
1940 patch_id,
1941 {
1942 data_id["patch"]
1943 for data_id in registry.queryDataIds(
1944 ["patch"],
1945 skymap=skymap_name,
1946 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1947 )
1948 },
1949 )
1950 # Use sync to update the tract region and insert to update the regions
1951 # of the patches, to the "small" suite.
1952 updated = registry.syncDimensionData(
1953 "tract",
1954 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1955 update=True,
1956 )
1957 self.assertEqual(updated, {"region": parent_region_large})
1958 registry.insertDimensionData(
1959 "patch",
1960 *[
1961 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
1962 for n, c in enumerate(child_regions_small)
1963 ],
1964 replace=True,
1965 )
1966 # Query again; there now should be no such overlaps, because the
1967 # database has the "small" suite of regions.
1968 self.assertFalse(
1969 set(
1970 registry.queryDataIds(
1971 ["tract"],
1972 skymap=skymap_name,
1973 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1974 )
1975 )
1976 )
1977 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1978 self.assertNotIn(
1979 patch_id,
1980 {
1981 data_id["patch"]
1982 for data_id in registry.queryDataIds(
1983 ["patch"],
1984 skymap=skymap_name,
1985 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1986 )
1987 },
1988 )
1989 # Update back to the large regions and query one more time.
1990 updated = registry.syncDimensionData(
1991 "tract",
1992 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1993 update=True,
1994 )
1995 self.assertEqual(updated, {"region": parent_region_small})
1996 registry.insertDimensionData(
1997 "patch",
1998 *[
1999 {"skymap": skymap_name, "tract": 0, "id": n, "cell_x": n % 2, "cell_y": n // 2, "region": c}
2000 for n, c in enumerate(child_regions_large)
2001 ],
2002 replace=True,
2003 )
2004 self.assertEqual(
2005 {0},
2006 {
2007 data_id["tract"]
2008 for data_id in registry.queryDataIds(
2009 ["tract"],
2010 skymap=skymap_name,
2011 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
2012 )
2013 },
2014 )
2015 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
2016 self.assertIn(
2017 patch_id,
2018 {
2019 data_id["patch"]
2020 for data_id in registry.queryDataIds(
2021 ["patch"],
2022 skymap=skymap_name,
2023 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
2024 )
2025 },
2026 )
2028 def testCalibrationCollections(self):
2029 """Test operations on `~CollectionType.CALIBRATION` collections,
2030 including `Registry.certify`, `Registry.decertify`, and
2031 `Registry.findDataset`.
2032 """
2033 # Setup - make a Registry, fill it with some datasets in
2034 # non-calibration collections.
2035 registry = self.makeRegistry()
2036 self.loadData(registry, "base.yaml")
2037 self.loadData(registry, "datasets.yaml")
2038 # Set up some timestamps.
2039 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
2040 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
2041 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
2042 t4 = astropy.time.Time("2020-01-01T04:00:00", format="isot", scale="tai")
2043 t5 = astropy.time.Time("2020-01-01T05:00:00", format="isot", scale="tai")
2044 allTimespans = [
2045 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
2046 ]
2047 # Get references to some datasets.
2048 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
2049 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
2050 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
2051 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
2052 # Register the main calibration collection we'll be working with.
2053 collection = "Cam1/calibs/default"
2054 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
2055 # Cannot associate into a calibration collection (no timespan).
2056 with self.assertRaises(CollectionTypeError):
2057 registry.associate(collection, [bias2a])
2058 # Certify 2a dataset with [t2, t4) validity.
2059 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
2060 # Test that we can query for this dataset via the new collection, both
2061 # on its own and with a RUN collection, as long as we don't try to join
2062 # in temporal dimensions or use findFirst=True.
2063 self.assertEqual(
2064 set(registry.queryDatasets("bias", findFirst=False, collections=collection)),
2065 {bias2a},
2066 )
2067 self.assertEqual(
2068 set(registry.queryDatasets("bias", findFirst=False, collections=[collection, "imported_r"])),
2069 {
2070 bias2a,
2071 bias2b,
2072 bias3b,
2073 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2074 },
2075 )
2076 self.assertEqual(
2077 set(registry.queryDataIds("detector", datasets="bias", collections=collection)),
2078 {registry.expandDataId(instrument="Cam1", detector=2)},
2079 )
2080 self.assertEqual(
2081 set(registry.queryDataIds("detector", datasets="bias", collections=[collection, "imported_r"])),
2082 {
2083 registry.expandDataId(instrument="Cam1", detector=2),
2084 registry.expandDataId(instrument="Cam1", detector=3),
2085 registry.expandDataId(instrument="Cam1", detector=4),
2086 },
2087 )
2089 # We should not be able to certify 2b with anything overlapping that
2090 # window.
2091 with self.assertRaises(ConflictingDefinitionError):
2092 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
2093 with self.assertRaises(ConflictingDefinitionError):
2094 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
2095 with self.assertRaises(ConflictingDefinitionError):
2096 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
2097 with self.assertRaises(ConflictingDefinitionError):
2098 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
2099 with self.assertRaises(ConflictingDefinitionError):
2100 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
2101 with self.assertRaises(ConflictingDefinitionError):
2102 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
2103 with self.assertRaises(ConflictingDefinitionError):
2104 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
2105 with self.assertRaises(ConflictingDefinitionError):
2106 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
2107 # We should be able to certify 3a with a range overlapping that window,
2108 # because it's for a different detector.
2109 # We'll certify 3a over [t1, t3).
2110 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
2111 # Now we'll certify 2b and 3b together over [t4, ∞).
2112 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
2114 # Fetch all associations and check that they are what we expect.
2115 self.assertCountEqual(
2116 list(
2117 registry.queryDatasetAssociations(
2118 "bias",
2119 collections=[collection, "imported_g", "imported_r"],
2120 )
2121 ),
2122 [
2123 DatasetAssociation(
2124 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
2125 collection="imported_g",
2126 timespan=None,
2127 ),
2128 DatasetAssociation(
2129 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
2130 collection="imported_r",
2131 timespan=None,
2132 ),
2133 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
2134 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
2135 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
2136 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
2137 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
2138 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
2139 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2140 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
2141 ],
2142 )
2144 class Ambiguous:
2145 """Tag class to denote lookups that should be ambiguous."""
2147 pass
2149 def assertLookup(
2150 detector: int, timespan: Timespan, expected: Optional[Union[DatasetRef, Type[Ambiguous]]]
2151 ) -> None:
2152 """Local function that asserts that a bias lookup returns the given
2153 expected result.
2154 """
2155 if expected is Ambiguous:
2156 with self.assertRaises((DatasetTypeError, LookupError)):
2157 registry.findDataset(
2158 "bias",
2159 collections=collection,
2160 instrument="Cam1",
2161 detector=detector,
2162 timespan=timespan,
2163 )
2164 else:
2165 self.assertEqual(
2166 expected,
2167 registry.findDataset(
2168 "bias",
2169 collections=collection,
2170 instrument="Cam1",
2171 detector=detector,
2172 timespan=timespan,
2173 ),
2174 )
2176 # Systematically test lookups against expected results.
2177 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2178 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2179 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2180 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2181 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
2182 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2183 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2184 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2185 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2186 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
2187 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2188 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2189 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2190 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
2191 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2192 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
2193 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
2194 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
2195 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
2196 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2197 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2198 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2199 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2200 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2201 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2202 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
2203 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2204 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2205 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2206 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2207 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
2208 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2209 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2210 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2211 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
2212 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2213 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2214 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
2215 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2216 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
2217 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2218 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2220 # Decertify [t3, t5) for all data IDs, and do test lookups again.
2221 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
2222 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
2223 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
2224 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
2225 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
2226 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
2227 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
2228 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
2229 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
2230 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
2231 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
2232 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
2233 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
2234 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
2235 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
2236 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
2237 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
2238 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
2239 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
2240 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
2241 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
2242 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
2243 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
2244 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
2245 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
2246 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
2247 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
2248 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
2249 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
2250 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
2251 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
2252 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
2253 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
2254 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
2255 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
2256 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
2257 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
2258 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
2259 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
2260 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
2261 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
2262 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
2263 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
2264 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
2265 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
2267 # Decertify everything, this time with explicit data IDs, then check
2268 # that no lookups succeed.
2269 registry.decertify(
2270 collection,
2271 "bias",
2272 Timespan(None, None),
2273 dataIds=[
2274 dict(instrument="Cam1", detector=2),
2275 dict(instrument="Cam1", detector=3),
2276 ],
2277 )
2278 for detector in (2, 3):
2279 for timespan in allTimespans:
2280 assertLookup(detector=detector, timespan=timespan, expected=None)
2281 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
2282 # those.
2283 registry.certify(
2284 collection,
2285 [bias2a, bias3a],
2286 Timespan(None, None),
2287 )
2288 for timespan in allTimespans:
2289 assertLookup(detector=2, timespan=timespan, expected=bias2a)
2290 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2291 # Decertify just bias2 over [t2, t4).
2292 # This should split a single certification row into two (and leave the
2293 # other existing row, for bias3a, alone).
2294 registry.decertify(
2295 collection, "bias", Timespan(t2, t4), dataIds=[dict(instrument="Cam1", detector=2)]
2296 )
2297 for timespan in allTimespans:
2298 assertLookup(detector=3, timespan=timespan, expected=bias3a)
2299 overlapsBefore = timespan.overlaps(Timespan(None, t2))
2300 overlapsAfter = timespan.overlaps(Timespan(t4, None))
2301 if overlapsBefore and overlapsAfter:
2302 expected = Ambiguous
2303 elif overlapsBefore or overlapsAfter:
2304 expected = bias2a
2305 else:
2306 expected = None
2307 assertLookup(detector=2, timespan=timespan, expected=expected)
2309 def testSkipCalibs(self):
2310 """Test how queries handle skipping of calibration collections."""
2311 registry = self.makeRegistry()
2312 self.loadData(registry, "base.yaml")
2313 self.loadData(registry, "datasets.yaml")
2315 coll_calib = "Cam1/calibs/default"
2316 registry.registerCollection(coll_calib, type=CollectionType.CALIBRATION)
2318 # Add all biases to the calibration collection.
2319 # Without this, the logic that prunes dataset subqueries based on
2320 # datasetType-collection summary information will fire before the logic
2321 # we want to test below. This is a good thing (it avoids the dreaded
2322 # NotImplementedError a bit more often) everywhere but here.
2323 registry.certify(coll_calib, registry.queryDatasets("bias", collections=...), Timespan(None, None))
2325 coll_list = [coll_calib, "imported_g", "imported_r"]
2326 chain = "Cam1/chain"
2327 registry.registerCollection(chain, type=CollectionType.CHAINED)
2328 registry.setCollectionChain(chain, coll_list)
2330 # explicit list will raise if findFirst=True or there are temporal
2331 # dimensions
2332 with self.assertRaises(NotImplementedError):
2333 registry.queryDatasets("bias", collections=coll_list, findFirst=True)
2334 with self.assertRaises(NotImplementedError):
2335 registry.queryDataIds(
2336 ["instrument", "detector", "exposure"], datasets="bias", collections=coll_list
2337 ).count()
2339 # chain will skip
2340 datasets = list(registry.queryDatasets("bias", collections=chain))
2341 self.assertGreater(len(datasets), 0)
2343 dataIds = list(registry.queryDataIds(["instrument", "detector"], datasets="bias", collections=chain))
2344 self.assertGreater(len(dataIds), 0)
2346 # glob will skip too
2347 datasets = list(registry.queryDatasets("bias", collections="*d*"))
2348 self.assertGreater(len(datasets), 0)
2350 # regular expression will skip too
2351 pattern = re.compile(".*")
2352 datasets = list(registry.queryDatasets("bias", collections=pattern))
2353 self.assertGreater(len(datasets), 0)
2355 # ellipsis should work as usual
2356 datasets = list(registry.queryDatasets("bias", collections=...))
2357 self.assertGreater(len(datasets), 0)
2359 # few tests with findFirst
2360 datasets = list(registry.queryDatasets("bias", collections=chain, findFirst=True))
2361 self.assertGreater(len(datasets), 0)
2363 def testIngestTimeQuery(self):
2364 registry = self.makeRegistry()
2365 self.loadData(registry, "base.yaml")
2366 dt0 = datetime.utcnow()
2367 self.loadData(registry, "datasets.yaml")
2368 dt1 = datetime.utcnow()
2370 datasets = list(registry.queryDatasets(..., collections=...))
2371 len0 = len(datasets)
2372 self.assertGreater(len0, 0)
2374 where = "ingest_date > T'2000-01-01'"
2375 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2376 len1 = len(datasets)
2377 self.assertEqual(len0, len1)
2379 # no one will ever use this piece of software in 30 years
2380 where = "ingest_date > T'2050-01-01'"
2381 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2382 len2 = len(datasets)
2383 self.assertEqual(len2, 0)
2385 # Check more exact timing to make sure there is no 37 seconds offset
2386 # (after fixing DM-30124). SQLite time precision is 1 second, make
2387 # sure that we don't test with higher precision.
2388 tests = [
2389 # format: (timestamp, operator, expected_len)
2390 (dt0 - timedelta(seconds=1), ">", len0),
2391 (dt0 - timedelta(seconds=1), "<", 0),
2392 (dt1 + timedelta(seconds=1), "<", len0),
2393 (dt1 + timedelta(seconds=1), ">", 0),
2394 ]
2395 for dt, op, expect_len in tests:
2396 dt_str = dt.isoformat(sep=" ")
2398 where = f"ingest_date {op} T'{dt_str}'"
2399 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2400 self.assertEqual(len(datasets), expect_len)
2402 # same with bind using datetime or astropy Time
2403 where = f"ingest_date {op} ingest_time"
2404 datasets = list(
2405 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt})
2406 )
2407 self.assertEqual(len(datasets), expect_len)
2409 dt_astropy = astropy.time.Time(dt, format="datetime")
2410 datasets = list(
2411 registry.queryDatasets(..., collections=..., where=where, bind={"ingest_time": dt_astropy})
2412 )
2413 self.assertEqual(len(datasets), expect_len)
2415 def testTimespanQueries(self):
2416 """Test query expressions involving timespans."""
2417 registry = self.makeRegistry()
2418 self.loadData(registry, "hsc-rc2-subset.yaml")
2419 # All exposures in the database; mapping from ID to timespan.
2420 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2421 # Just those IDs, sorted (which is also temporal sorting, because HSC
2422 # exposure IDs are monotonically increasing).
2423 ids = sorted(visits.keys())
2424 self.assertGreater(len(ids), 20)
2425 # Pick some quasi-random indexes into `ids` to play with.
2426 i1 = int(len(ids) * 0.1)
2427 i2 = int(len(ids) * 0.3)
2428 i3 = int(len(ids) * 0.6)
2429 i4 = int(len(ids) * 0.8)
2430 # Extract some times from those: just before the beginning of i1 (which
2431 # should be after the end of the exposure before), exactly the
2432 # beginning of i2, just after the beginning of i3 (and before its end),
2433 # and the exact end of i4.
2434 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2435 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2436 t2 = visits[ids[i2]].begin
2437 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2438 self.assertLess(t3, visits[ids[i3]].end)
2439 t4 = visits[ids[i4]].end
2440 # Make sure those are actually in order.
2441 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2443 bind = {
2444 "t1": t1,
2445 "t2": t2,
2446 "t3": t3,
2447 "t4": t4,
2448 "ts23": Timespan(t2, t3),
2449 }
2451 def query(where):
2452 """Helper function that queries for visit data IDs and returns
2453 results as a sorted, deduplicated list of visit IDs.
2454 """
2455 return sorted(
2456 {
2457 dataId["visit"]
2458 for dataId in registry.queryDataIds("visit", instrument="HSC", bind=bind, where=where)
2459 }
2460 )
2462 # Try a bunch of timespan queries, mixing up the bounds themselves,
2463 # where they appear in the expression, and how we get the timespan into
2464 # the expression.
2466 # t1 is before the start of i1, so this should not include i1.
2467 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2468 # t2 is exactly at the start of i2, but ends are exclusive, so these
2469 # should not include i2.
2470 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2471 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2472 # t3 is in the middle of i3, so this should include i3.
2473 self.assertEqual(ids[i2 : i3 + 1], query("visit.timespan OVERLAPS ts23"))
2474 # This one should not include t3 by the same reasoning.
2475 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > (t1, t3)"))
2476 # t4 is exactly at the end of i4, so this should include i4.
2477 self.assertEqual(ids[i3 : i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2478 # i4's upper bound of t4 is exclusive so this should not include t4.
2479 self.assertEqual(ids[i4 + 1 :], query("visit.timespan OVERLAPS (t4, NULL)"))
2481 # Now some timespan vs. time scalar queries.
2482 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2483 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2484 self.assertEqual(ids[i3 + 1 :], query("visit.timespan > t3"))
2485 self.assertEqual(ids[i3 + 1 :], query("t3 < visit.timespan"))
2486 self.assertEqual(ids[i3 : i3 + 1], query("visit.timespan OVERLAPS t3"))
2487 self.assertEqual(ids[i3 : i3 + 1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2489 # Empty timespans should not overlap anything.
2490 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2492 def testCollectionSummaries(self):
2493 """Test recording and retrieval of collection summaries."""
2494 self.maxDiff = None
2495 registry = self.makeRegistry()
2496 # Importing datasets from yaml should go through the code path where
2497 # we update collection summaries as we insert datasets.
2498 self.loadData(registry, "base.yaml")
2499 self.loadData(registry, "datasets.yaml")
2500 flat = registry.getDatasetType("flat")
2501 expected1 = CollectionSummary()
2502 expected1.dataset_types.add(registry.getDatasetType("bias"))
2503 expected1.add_data_ids(
2504 flat, [DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)]
2505 )
2506 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2507 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2508 # Create a chained collection with both of the imported runs; the
2509 # summary should be the same, because it's a union with itself.
2510 chain = "chain"
2511 registry.registerCollection(chain, CollectionType.CHAINED)
2512 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2513 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2514 # Associate flats only into a tagged collection and a calibration
2515 # collection to check summaries of those.
2516 tag = "tag"
2517 registry.registerCollection(tag, CollectionType.TAGGED)
2518 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2519 calibs = "calibs"
2520 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2521 registry.certify(
2522 calibs, registry.queryDatasets(flat, collections="imported_g"), timespan=Timespan(None, None)
2523 )
2524 expected2 = expected1.copy()
2525 expected2.dataset_types.discard("bias")
2526 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2527 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2528 # Explicitly calling Registry.refresh() should load those same
2529 # summaries, via a totally different code path.
2530 registry.refresh()
2531 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2532 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2533 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2534 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2536 def testBindInQueryDatasets(self):
2537 """Test that the bind parameter is correctly forwarded in
2538 queryDatasets recursion.
2539 """
2540 registry = self.makeRegistry()
2541 # Importing datasets from yaml should go through the code path where
2542 # we update collection summaries as we insert datasets.
2543 self.loadData(registry, "base.yaml")
2544 self.loadData(registry, "datasets.yaml")
2545 self.assertEqual(
2546 set(registry.queryDatasets("flat", band="r", collections=...)),
2547 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2548 )
2550 def testQueryIntRangeExpressions(self):
2551 """Test integer range expressions in ``where`` arguments.
2553 Note that our expressions use inclusive stop values, unlike Python's.
2554 """
2555 registry = self.makeRegistry()
2556 self.loadData(registry, "base.yaml")
2557 self.assertEqual(
2558 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..2)")),
2559 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 2]},
2560 )
2561 self.assertEqual(
2562 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (1..4:2)")),
2563 {registry.expandDataId(instrument="Cam1", detector=n) for n in [1, 3]},
2564 )
2565 self.assertEqual(
2566 set(registry.queryDataIds(["detector"], instrument="Cam1", where="detector IN (2..4:2)")),
2567 {registry.expandDataId(instrument="Cam1", detector=n) for n in [2, 4]},
2568 )
2570 def testQueryResultSummaries(self):
2571 """Test summary methods like `count`, `any`, and `explain_no_results`
2572 on `DataCoordinateQueryResults` and `DatasetQueryResults`
2573 """
2574 registry = self.makeRegistry()
2575 self.loadData(registry, "base.yaml")
2576 self.loadData(registry, "datasets.yaml")
2577 self.loadData(registry, "spatial.yaml")
2578 # Default test dataset has two collections, each with both flats and
2579 # biases. Add a new collection with only biases.
2580 registry.registerCollection("biases", CollectionType.TAGGED)
2581 registry.associate("biases", registry.queryDatasets("bias", collections=["imported_g"]))
2582 # First query yields two results, and involves no postprocessing.
2583 query1 = registry.queryDataIds(["physical_filter"], band="r")
2584 self.assertTrue(query1.any(execute=False, exact=False))
2585 self.assertTrue(query1.any(execute=True, exact=False))
2586 self.assertTrue(query1.any(execute=True, exact=True))
2587 self.assertEqual(query1.count(exact=False), 2)
2588 self.assertEqual(query1.count(exact=True), 2)
2589 self.assertFalse(list(query1.explain_no_results()))
2590 # Second query should yield no results, which we should see when
2591 # we attempt to expand the data ID.
2592 query2 = registry.queryDataIds(["physical_filter"], band="h")
2593 # There's no execute=False, exact=Fals test here because the behavior
2594 # not something we want to guarantee in this case (and exact=False
2595 # says either answer is legal).
2596 self.assertFalse(query2.any(execute=True, exact=False))
2597 self.assertFalse(query2.any(execute=True, exact=True))
2598 self.assertEqual(query2.count(exact=False), 0)
2599 self.assertEqual(query2.count(exact=True), 0)
2600 self.assertTrue(list(query2.explain_no_results()))
2601 # These queries yield no results due to various problems that can be
2602 # spotted prior to execution, yielding helpful diagnostics.
2603 base_query = registry.queryDataIds(["detector", "physical_filter"])
2604 queries_and_snippets = [
2605 (
2606 # Dataset type name doesn't match any existing dataset types.
2607 registry.queryDatasets("nonexistent", collections=...),
2608 ["nonexistent"],
2609 ),
2610 (
2611 # Dataset type object isn't registered.
2612 registry.queryDatasets(
2613 DatasetType(
2614 "nonexistent",
2615 dimensions=["instrument"],
2616 universe=registry.dimensions,
2617 storageClass="Image",
2618 ),
2619 collections=...,
2620 ),
2621 ["nonexistent"],
2622 ),
2623 (
2624 # No datasets of this type in this collection.
2625 registry.queryDatasets("flat", collections=["biases"]),
2626 ["flat", "biases"],
2627 ),
2628 (
2629 # No datasets of this type in this collection.
2630 base_query.findDatasets("flat", collections=["biases"]),
2631 ["flat", "biases"],
2632 ),
2633 (
2634 # No collections matching at all.
2635 registry.queryDatasets("flat", collections=re.compile("potato.+")),
2636 ["potato"],
2637 ),
2638 ]
2639 # The behavior of these additional queries is slated to change in the
2640 # future, so we also check for deprecation warnings.
2641 with self.assertWarns(FutureWarning):
2642 queries_and_snippets.append(
2643 (
2644 # Dataset type name doesn't match any existing dataset
2645 # types.
2646 registry.queryDataIds(["detector"], datasets=["nonexistent"], collections=...),
2647 ["nonexistent"],
2648 )
2649 )
2650 with self.assertWarns(FutureWarning):
2651 queries_and_snippets.append(
2652 (
2653 # Dataset type name doesn't match any existing dataset
2654 # types.
2655 registry.queryDimensionRecords("detector", datasets=["nonexistent"], collections=...),
2656 ["nonexistent"],
2657 )
2658 )
2659 for query, snippets in queries_and_snippets:
2660 self.assertFalse(query.any(execute=False, exact=False))
2661 self.assertFalse(query.any(execute=True, exact=False))
2662 self.assertFalse(query.any(execute=True, exact=True))
2663 self.assertEqual(query.count(exact=False), 0)
2664 self.assertEqual(query.count(exact=True), 0)
2665 messages = list(query.explain_no_results())
2666 self.assertTrue(messages)
2667 # Want all expected snippets to appear in at least one message.
2668 self.assertTrue(
2669 any(
2670 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2671 ),
2672 messages,
2673 )
2675 # This query does yield results, but should also emit a warning because
2676 # dataset type patterns to queryDataIds is deprecated; just look for
2677 # the warning.
2678 with self.assertWarns(FutureWarning):
2679 registry.queryDataIds(["detector"], datasets=re.compile("^nonexistent$"), collections=...)
2681 # These queries yield no results due to problems that can be identified
2682 # by cheap follow-up queries, yielding helpful diagnostics.
2683 for query, snippets in [
2684 (
2685 # No records for one of the involved dimensions.
2686 registry.queryDataIds(["subfilter"]),
2687 ["no rows", "subfilter"],
2688 ),
2689 (
2690 # No records for one of the involved dimensions.
2691 registry.queryDimensionRecords("subfilter"),
2692 ["no rows", "subfilter"],
2693 ),
2694 ]:
2695 self.assertFalse(query.any(execute=True, exact=False))
2696 self.assertFalse(query.any(execute=True, exact=True))
2697 self.assertEqual(query.count(exact=True), 0)
2698 messages = list(query.explain_no_results())
2699 self.assertTrue(messages)
2700 # Want all expected snippets to appear in at least one message.
2701 self.assertTrue(
2702 any(
2703 all(snippet in message for snippet in snippets) for message in query.explain_no_results()
2704 ),
2705 messages,
2706 )
2708 # This query yields four overlaps in the database, but one is filtered
2709 # out in postprocessing. The count queries aren't accurate because
2710 # they don't account for duplication that happens due to an internal
2711 # join against commonSkyPix.
2712 query3 = registry.queryDataIds(["visit", "tract"], instrument="Cam1", skymap="SkyMap1")
2713 self.assertEqual(
2714 {
2715 DataCoordinate.standardize(
2716 instrument="Cam1",
2717 skymap="SkyMap1",
2718 visit=v,
2719 tract=t,
2720 universe=registry.dimensions,
2721 )
2722 for v, t in [(1, 0), (2, 0), (2, 1)]
2723 },
2724 set(query3),
2725 )
2726 self.assertTrue(query3.any(execute=False, exact=False))
2727 self.assertTrue(query3.any(execute=True, exact=False))
2728 self.assertTrue(query3.any(execute=True, exact=True))
2729 self.assertGreaterEqual(query3.count(exact=False), 4)
2730 self.assertGreaterEqual(query3.count(exact=True, discard=True), 3)
2731 self.assertFalse(list(query3.explain_no_results()))
2732 # This query yields overlaps in the database, but all are filtered
2733 # out in postprocessing. The count queries again aren't very useful.
2734 # We have to use `where=` here to avoid an optimization that
2735 # (currently) skips the spatial postprocess-filtering because it
2736 # recognizes that no spatial join is necessary. That's not ideal, but
2737 # fixing it is out of scope for this ticket.
2738 query4 = registry.queryDataIds(
2739 ["visit", "tract"],
2740 instrument="Cam1",
2741 skymap="SkyMap1",
2742 where="visit=1 AND detector=1 AND tract=0 AND patch=4",
2743 )
2744 self.assertFalse(set(query4))
2745 self.assertTrue(query4.any(execute=False, exact=False))
2746 self.assertTrue(query4.any(execute=True, exact=False))
2747 self.assertFalse(query4.any(execute=True, exact=True))
2748 self.assertGreaterEqual(query4.count(exact=False), 1)
2749 self.assertEqual(query4.count(exact=True, discard=True), 0)
2750 messages = query4.explain_no_results()
2751 self.assertTrue(messages)
2752 self.assertTrue(any("overlap" in message for message in messages))
2753 # This query should yield results from one dataset type but not the
2754 # other, which is not registered.
2755 query5 = registry.queryDatasets(["bias", "nonexistent"], collections=["biases"])
2756 self.assertTrue(set(query5))
2757 self.assertTrue(query5.any(execute=False, exact=False))
2758 self.assertTrue(query5.any(execute=True, exact=False))
2759 self.assertTrue(query5.any(execute=True, exact=True))
2760 self.assertGreaterEqual(query5.count(exact=False), 1)
2761 self.assertGreaterEqual(query5.count(exact=True), 1)
2762 self.assertFalse(list(query5.explain_no_results()))
2763 # This query applies a selection that yields no results, fully in the
2764 # database. Explaining why it fails involves traversing the relation
2765 # tree and running a LIMIT 1 query at each level that has the potential
2766 # to remove rows.
2767 query6 = registry.queryDimensionRecords(
2768 "detector", where="detector.purpose = 'no-purpose'", instrument="Cam1"
2769 )
2770 self.assertEqual(query6.count(exact=True), 0)
2771 messages = query6.explain_no_results()
2772 self.assertTrue(messages)
2773 self.assertTrue(any("no-purpose" in message for message in messages))
2775 def testQueryDataIdsOrderBy(self):
2776 """Test order_by and limit on result returned by queryDataIds()."""
2777 registry = self.makeRegistry()
2778 self.loadData(registry, "base.yaml")
2779 self.loadData(registry, "datasets.yaml")
2780 self.loadData(registry, "spatial.yaml")
2782 def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
2783 return registry.queryDataIds(
2784 dimensions, datasets=datasets, collections=collections, instrument="Cam1", skymap="SkyMap1"
2785 )
2787 Test = namedtuple(
2788 "testQueryDataIdsOrderByTest",
2789 ("order_by", "keys", "result", "limit", "datasets", "collections"),
2790 defaults=(None, None, None),
2791 )
2793 test_data = (
2794 Test("tract,visit", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2795 Test("-tract,visit", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))),
2796 Test("tract,-visit", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))),
2797 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2), (0, 2), (0, 1), (0, 1))),
2798 Test(
2799 "tract.id,visit.id",
2800 "tract,visit",
2801 ((0, 1), (0, 1), (0, 2)),
2802 limit=(3,),
2803 ),
2804 Test("-tract,-visit", "tract,visit", ((1, 2), (1, 2), (0, 2)), limit=(3,)),
2805 Test("tract,visit", "tract,visit", ((0, 2), (1, 2), (1, 2)), limit=(3, 3)),
2806 Test("-tract,-visit", "tract,visit", ((0, 1),), limit=(3, 5)),
2807 Test(
2808 "tract,visit.exposure_time", "tract,visit", ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2))
2809 ),
2810 Test(
2811 "-tract,-visit.exposure_time", "tract,visit", ((1, 2), (1, 2), (0, 1), (0, 1), (0, 2), (0, 2))
2812 ),
2813 Test("tract,-exposure_time", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2814 Test("tract,visit.name", "tract,visit", ((0, 1), (0, 1), (0, 2), (0, 2), (1, 2), (1, 2))),
2815 Test(
2816 "tract,-timespan.begin,timespan.end",
2817 "tract,visit",
2818 ((0, 2), (0, 2), (0, 1), (0, 1), (1, 2), (1, 2)),
2819 ),
2820 Test("visit.day_obs,exposure.day_obs", "visit,exposure", ()),
2821 Test("visit.timespan.begin,-exposure.timespan.begin", "visit,exposure", ()),
2822 Test(
2823 "tract,detector",
2824 "tract,detector",
2825 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2826 datasets="flat",
2827 collections="imported_r",
2828 ),
2829 Test(
2830 "tract,detector.full_name",
2831 "tract,detector",
2832 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2833 datasets="flat",
2834 collections="imported_r",
2835 ),
2836 Test(
2837 "tract,detector.raft,detector.name_in_raft",
2838 "tract,detector",
2839 ((0, 1), (0, 2), (0, 3), (0, 4), (1, 1), (1, 2), (1, 3), (1, 4)),
2840 datasets="flat",
2841 collections="imported_r",
2842 ),
2843 )
2845 for test in test_data:
2846 order_by = test.order_by.split(",")
2847 keys = test.keys.split(",")
2848 query = do_query(keys, test.datasets, test.collections).order_by(*order_by)
2849 if test.limit is not None:
2850 query = query.limit(*test.limit)
2851 dataIds = tuple(tuple(dataId[k] for k in keys) for dataId in query)
2852 self.assertEqual(dataIds, test.result)
2854 # and materialize
2855 query = do_query(keys).order_by(*order_by)
2856 if test.limit is not None:
2857 query = query.limit(*test.limit)
2858 with self.assertRaises(RelationalAlgebraError):
2859 with query.materialize():
2860 pass
2862 # errors in a name
2863 for order_by in ("", "-"):
2864 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
2865 list(do_query().order_by(order_by))
2867 for order_by in ("undimension.name", "-undimension.name"):
2868 with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
2869 list(do_query().order_by(order_by))
2871 for order_by in ("attract", "-attract"):
2872 with self.assertRaisesRegex(ValueError, "Metadata 'attract' cannot be found in any dimension"):
2873 list(do_query().order_by(order_by))
2875 with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
2876 list(do_query(("exposure", "visit")).order_by("exposure_time"))
2878 with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
2879 list(do_query(("exposure", "visit")).order_by("timespan.begin"))
2881 with self.assertRaisesRegex(
2882 ValueError, "Cannot find any temporal dimension element for 'timespan.begin'"
2883 ):
2884 list(do_query("tract").order_by("timespan.begin"))
2886 with self.assertRaisesRegex(ValueError, "Cannot use 'timespan.begin' with non-temporal element"):
2887 list(do_query("tract").order_by("tract.timespan.begin"))
2889 with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
2890 list(do_query("tract").order_by("tract.name"))
2892 def testQueryDataIdsGovernorExceptions(self):
2893 """Test exceptions raised by queryDataIds() for incorrect governors."""
2894 registry = self.makeRegistry()
2895 self.loadData(registry, "base.yaml")
2896 self.loadData(registry, "datasets.yaml")
2897 self.loadData(registry, "spatial.yaml")
2899 def do_query(dimensions, dataId=None, where="", bind=None, **kwargs):
2900 return registry.queryDataIds(dimensions, dataId=dataId, where=where, bind=bind, **kwargs)
2902 Test = namedtuple(
2903 "testQueryDataIdExceptionsTest",
2904 ("dimensions", "dataId", "where", "bind", "kwargs", "exception", "count"),
2905 defaults=(None, None, None, {}, None, 0),
2906 )
2908 test_data = (
2909 Test("tract,visit", count=6),
2910 Test("tract,visit", kwargs={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2911 Test(
2912 "tract,visit", kwargs={"instrument": "Cam2", "skymap": "SkyMap1"}, exception=DataIdValueError
2913 ),
2914 Test("tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap1"}, count=6),
2915 Test(
2916 "tract,visit", dataId={"instrument": "Cam1", "skymap": "SkyMap2"}, exception=DataIdValueError
2917 ),
2918 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap1'", count=6),
2919 Test("tract,visit", where="instrument='Cam1' AND skymap='SkyMap5'", exception=DataIdValueError),
2920 Test(
2921 "tract,visit",
2922 where="instrument=cam AND skymap=map",
2923 bind={"cam": "Cam1", "map": "SkyMap1"},
2924 count=6,
2925 ),
2926 Test(
2927 "tract,visit",
2928 where="instrument=cam AND skymap=map",
2929 bind={"cam": "Cam", "map": "SkyMap"},
2930 exception=DataIdValueError,
2931 ),
2932 )
2934 for test in test_data:
2935 dimensions = test.dimensions.split(",")
2936 if test.exception:
2937 with self.assertRaises(test.exception):
2938 do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs).count()
2939 else:
2940 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2941 self.assertEqual(query.count(discard=True), test.count)
2943 # and materialize
2944 if test.exception:
2945 with self.assertRaises(test.exception):
2946 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2947 with query.materialize() as materialized:
2948 materialized.count(discard=True)
2949 else:
2950 query = do_query(dimensions, test.dataId, test.where, bind=test.bind, **test.kwargs)
2951 with query.materialize() as materialized:
2952 self.assertEqual(materialized.count(discard=True), test.count)
2954 def testQueryDimensionRecordsOrderBy(self):
2955 """Test order_by and limit on result returned by
2956 queryDimensionRecords().
2957 """
2958 registry = self.makeRegistry()
2959 self.loadData(registry, "base.yaml")
2960 self.loadData(registry, "datasets.yaml")
2961 self.loadData(registry, "spatial.yaml")
2963 def do_query(element, datasets=None, collections=None):
2964 return registry.queryDimensionRecords(
2965 element, instrument="Cam1", datasets=datasets, collections=collections
2966 )
2968 query = do_query("detector")
2969 self.assertEqual(len(list(query)), 4)
2971 Test = namedtuple(
2972 "testQueryDataIdsOrderByTest",
2973 ("element", "order_by", "result", "limit", "datasets", "collections"),
2974 defaults=(None, None, None),
2975 )
2977 test_data = (
2978 Test("detector", "detector", (1, 2, 3, 4)),
2979 Test("detector", "-detector", (4, 3, 2, 1)),
2980 Test("detector", "raft,-name_in_raft", (2, 1, 4, 3)),
2981 Test("detector", "-detector.purpose", (4,), limit=(1,)),
2982 Test("detector", "-purpose,detector.raft,name_in_raft", (2, 3), limit=(2, 2)),
2983 Test("visit", "visit", (1, 2)),
2984 Test("visit", "-visit.id", (2, 1)),
2985 Test("visit", "zenith_angle", (1, 2)),
2986 Test("visit", "-visit.name", (2, 1)),
2987 Test("visit", "day_obs,-timespan.begin", (2, 1)),
2988 )
2990 for test in test_data:
2991 order_by = test.order_by.split(",")
2992 query = do_query(test.element).order_by(*order_by)
2993 if test.limit is not None:
2994 query = query.limit(*test.limit)
2995 dataIds = tuple(rec.id for rec in query)
2996 self.assertEqual(dataIds, test.result)
2998 # errors in a name
2999 for order_by in ("", "-"):
3000 with self.assertRaisesRegex(ValueError, "Empty dimension name in ORDER BY"):
3001 list(do_query("detector").order_by(order_by))
3003 for order_by in ("undimension.name", "-undimension.name"):
3004 with self.assertRaisesRegex(ValueError, "Element name mismatch: 'undimension'"):
3005 list(do_query("detector").order_by(order_by))
3007 for order_by in ("attract", "-attract"):
3008 with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
3009 list(do_query("detector").order_by(order_by))
3011 def testQueryDimensionRecordsExceptions(self):
3012 """Test exceptions raised by queryDimensionRecords()."""
3013 registry = self.makeRegistry()
3014 self.loadData(registry, "base.yaml")
3015 self.loadData(registry, "datasets.yaml")
3016 self.loadData(registry, "spatial.yaml")
3018 result = registry.queryDimensionRecords("detector")
3019 self.assertEqual(result.count(), 4)
3020 result = registry.queryDimensionRecords("detector", instrument="Cam1")
3021 self.assertEqual(result.count(), 4)
3022 result = registry.queryDimensionRecords("detector", dataId={"instrument": "Cam1"})
3023 self.assertEqual(result.count(), 4)
3024 result = registry.queryDimensionRecords("detector", where="instrument='Cam1'")
3025 self.assertEqual(result.count(), 4)
3026 result = registry.queryDimensionRecords("detector", where="instrument=instr", bind={"instr": "Cam1"})
3027 self.assertEqual(result.count(), 4)
3029 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3030 result = registry.queryDimensionRecords("detector", instrument="NotCam1")
3031 result.count()
3033 with self.assertRaisesRegex(DataIdValueError, "dimension instrument"):
3034 result = registry.queryDimensionRecords("detector", dataId={"instrument": "NotCam1"})
3035 result.count()
3037 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3038 result = registry.queryDimensionRecords("detector", where="instrument='NotCam1'")
3039 result.count()
3041 with self.assertRaisesRegex(DataIdValueError, "Unknown values specified for governor dimension"):
3042 result = registry.queryDimensionRecords(
3043 "detector", where="instrument=instr", bind={"instr": "NotCam1"}
3044 )
3045 result.count()
3047 def testDatasetConstrainedDimensionRecordQueries(self):
3048 """Test that queryDimensionRecords works even when given a dataset
3049 constraint whose dimensions extend beyond the requested dimension
3050 element's.
3051 """
3052 registry = self.makeRegistry()
3053 self.loadData(registry, "base.yaml")
3054 self.loadData(registry, "datasets.yaml")
3055 # Query for physical_filter dimension records, using a dataset that
3056 # has both physical_filter and dataset dimensions.
3057 records = registry.queryDimensionRecords(
3058 "physical_filter",
3059 datasets=["flat"],
3060 collections="imported_r",
3061 )
3062 self.assertEqual({record.name for record in records}, {"Cam1-R1", "Cam1-R2"})
3063 # Trying to constrain by all dataset types is an error.
3064 with self.assertRaises(TypeError):
3065 list(registry.queryDimensionRecords("physical_filter", datasets=..., collections="imported_r"))
3067 def testSkyPixDatasetQueries(self):
3068 """Test that we can build queries involving skypix dimensions as long
3069 as a dataset type that uses those dimensions is included.
3070 """
3071 registry = self.makeRegistry()
3072 self.loadData(registry, "base.yaml")
3073 dataset_type = DatasetType(
3074 "a", dimensions=["htm7", "instrument"], universe=registry.dimensions, storageClass="int"
3075 )
3076 registry.registerDatasetType(dataset_type)
3077 run = "r"
3078 registry.registerRun(run)
3079 # First try queries where there are no datasets; the concern is whether
3080 # we can even build and execute these queries without raising, even
3081 # when "doomed" query shortcuts are in play.
3082 self.assertFalse(
3083 list(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run))
3084 )
3085 self.assertFalse(list(registry.queryDatasets(dataset_type, collections=run)))
3086 # Now add a dataset and see that we can get it back.
3087 htm7 = registry.dimensions.skypix["htm"][7].pixelization
3088 data_id = registry.expandDataId(instrument="Cam1", htm7=htm7.universe()[0][0])
3089 (ref,) = registry.insertDatasets(dataset_type, [data_id], run=run)
3090 self.assertEqual(
3091 set(registry.queryDataIds(["htm7", "instrument"], datasets=dataset_type, collections=run)),
3092 {data_id},
3093 )
3094 self.assertEqual(set(registry.queryDatasets(dataset_type, collections=run)), {ref})
3096 def testDatasetIdFactory(self):
3097 """Simple test for DatasetIdFactory, mostly to catch potential changes
3098 in its API.
3099 """
3100 registry = self.makeRegistry()
3101 factory = registry.datasetIdFactory
3102 dataset_type = DatasetType(
3103 "datasetType",
3104 dimensions=["detector", "instrument"],
3105 universe=registry.dimensions,
3106 storageClass="int",
3107 )
3108 run = "run"
3109 data_id = DataCoordinate.standardize(instrument="Cam1", detector=1, graph=dataset_type.dimensions)
3111 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.UNIQUE)
3112 self.assertIsInstance(datasetId, uuid.UUID)
3113 self.assertEqual(datasetId.version, 4)
3115 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE)
3116 self.assertIsInstance(datasetId, uuid.UUID)
3117 self.assertEqual(datasetId.version, 5)
3119 datasetId = factory.makeDatasetId(run, dataset_type, data_id, DatasetIdGenEnum.DATAID_TYPE_RUN)
3120 self.assertIsInstance(datasetId, uuid.UUID)
3121 self.assertEqual(datasetId.version, 5)
3123 def testExposureQueries(self):
3124 """Test query methods using arguments sourced from the exposure log
3125 service.
3127 The most complete test dataset currently available to daf_butler tests
3128 is hsc-rc2-subset.yaml export (which is unfortunately distinct from the
3129 the lsst/rc2_subset GitHub repo), but that does not have 'exposure'
3130 dimension records as it was focused on providing nontrivial spatial
3131 overlaps between visit+detector and tract+patch. So in this test we
3132 need to translate queries that originally used the exposure dimension
3133 to use the (very similar) visit dimension instead.
3134 """
3135 registry = self.makeRegistry()
3136 self.loadData(registry, "hsc-rc2-subset.yaml")
3137 self.assertEqual(
3138 [
3139 record.id
3140 for record in registry.queryDimensionRecords("visit", instrument="HSC")
3141 .order_by("id")
3142 .limit(5)
3143 ],
3144 [318, 322, 326, 330, 332],
3145 )
3146 self.assertEqual(
3147 [
3148 data_id["visit"]
3149 for data_id in registry.queryDataIds(["visit"], instrument="HSC").order_by("id").limit(5)
3150 ],
3151 [318, 322, 326, 330, 332],
3152 )
3153 self.assertEqual(
3154 [
3155 record.id
3156 for record in registry.queryDimensionRecords("detector", instrument="HSC")
3157 .order_by("full_name")
3158 .limit(5)
3159 ],
3160 [73, 72, 71, 70, 65],
3161 )
3162 self.assertEqual(
3163 [
3164 data_id["detector"]
3165 for data_id in registry.queryDataIds(["detector"], instrument="HSC")
3166 .order_by("full_name")
3167 .limit(5)
3168 ],
3169 [73, 72, 71, 70, 65],
3170 )