Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27from datetime import datetime, timedelta
28import itertools
29import logging
30import os
31import re
32from typing import Iterator
33import unittest
35import astropy.time
36import sqlalchemy
37from typing import Optional, Type, Union, TYPE_CHECKING
39try:
40 import numpy as np
41except ImportError:
42 np = None
44import lsst.sphgeom
45from ...core import (
46 DataCoordinate,
47 DataCoordinateSequence,
48 DataCoordinateSet,
49 DatasetAssociation,
50 DatasetRef,
51 DatasetType,
52 DimensionGraph,
53 NamedValueSet,
54 StorageClass,
55 ddl,
56 Timespan,
57)
58from ..summaries import CollectionSummary
59from .._collectionType import CollectionType
60from .._config import RegistryConfig
62from .._exceptions import (
63 ConflictingDefinitionError,
64 InconsistentDataIdError,
65 MissingCollectionError,
66 OrphanedRecordError,
67)
68from ..interfaces import ButlerAttributeExistsError
70if TYPE_CHECKING: 70 ↛ 71line 70 didn't jump to line 71, because the condition on line 70 was never true
71 from .._registry import Registry
74class RegistryTests(ABC):
75 """Generic tests for the `Registry` class that can be subclassed to
76 generate tests for different configurations.
77 """
79 collectionsManager: Optional[str] = None
80 """Name of the collections manager class, if subclass provides value for
81 this member then it overrides name specified in default configuration
82 (`str`).
83 """
85 datasetsManager: Optional[str] = None
86 """Name of the datasets manager class, if subclass provides value for
87 this member then it overrides name specified in default configuration
88 (`str`).
89 """
91 @classmethod
92 @abstractmethod
93 def getDataDir(cls) -> str:
94 """Return the root directory containing test data YAML files.
95 """
96 raise NotImplementedError()
98 def makeRegistryConfig(self) -> RegistryConfig:
99 """Create RegistryConfig used to create a registry.
101 This method should be called by a subclass from `makeRegistry`.
102 Returned instance will be pre-configured based on the values of class
103 members, and default-configured for all other parametrs. Subclasses
104 that need default configuration should just instantiate
105 `RegistryConfig` directly.
106 """
107 config = RegistryConfig()
108 if self.collectionsManager:
109 config["managers", "collections"] = self.collectionsManager
110 if self.datasetsManager:
111 config["managers", "datasets"] = self.datasetsManager
112 return config
114 @abstractmethod
115 def makeRegistry(self) -> Registry:
116 """Return the Registry instance to be tested.
117 """
118 raise NotImplementedError()
120 def loadData(self, registry: Registry, filename: str):
121 """Load registry test data from ``getDataDir/<filename>``,
122 which should be a YAML import/export file.
123 """
124 from ...transfers import YamlRepoImportBackend
125 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
126 backend = YamlRepoImportBackend(stream, registry)
127 backend.register()
128 backend.load(datastore=None)
130 def testOpaque(self):
131 """Tests for `Registry.registerOpaqueTable`,
132 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
133 `Registry.deleteOpaqueData`.
134 """
135 registry = self.makeRegistry()
136 table = "opaque_table_for_testing"
137 registry.registerOpaqueTable(
138 table,
139 spec=ddl.TableSpec(
140 fields=[
141 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
142 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
143 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
144 ],
145 )
146 )
147 rows = [
148 {"id": 1, "name": "one", "count": None},
149 {"id": 2, "name": "two", "count": 5},
150 {"id": 3, "name": "three", "count": 6},
151 ]
152 registry.insertOpaqueData(table, *rows)
153 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
154 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
155 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
156 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
157 registry.deleteOpaqueData(table, id=3)
158 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
159 registry.deleteOpaqueData(table)
160 self.assertEqual([], list(registry.fetchOpaqueData(table)))
162 def testDatasetType(self):
163 """Tests for `Registry.registerDatasetType` and
164 `Registry.getDatasetType`.
165 """
166 registry = self.makeRegistry()
167 # Check valid insert
168 datasetTypeName = "test"
169 storageClass = StorageClass("testDatasetType")
170 registry.storageClasses.registerStorageClass(storageClass)
171 dimensions = registry.dimensions.extract(("instrument", "visit"))
172 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
173 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
174 # Inserting for the first time should return True
175 self.assertTrue(registry.registerDatasetType(inDatasetType))
176 outDatasetType1 = registry.getDatasetType(datasetTypeName)
177 self.assertEqual(outDatasetType1, inDatasetType)
179 # Re-inserting should work
180 self.assertFalse(registry.registerDatasetType(inDatasetType))
181 # Except when they are not identical
182 with self.assertRaises(ConflictingDefinitionError):
183 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
184 registry.registerDatasetType(nonIdenticalDatasetType)
186 # Template can be None
187 datasetTypeName = "testNoneTemplate"
188 storageClass = StorageClass("testDatasetType2")
189 registry.storageClasses.registerStorageClass(storageClass)
190 dimensions = registry.dimensions.extract(("instrument", "visit"))
191 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
192 registry.registerDatasetType(inDatasetType)
193 outDatasetType2 = registry.getDatasetType(datasetTypeName)
194 self.assertEqual(outDatasetType2, inDatasetType)
196 allTypes = set(registry.queryDatasetTypes())
197 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
199 def testDimensions(self):
200 """Tests for `Registry.insertDimensionData`,
201 `Registry.syncDimensionData`, and `Registry.expandDataId`.
202 """
203 registry = self.makeRegistry()
204 dimensionName = "instrument"
205 dimension = registry.dimensions[dimensionName]
206 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
207 "class_name": "lsst.obs.base.Instrument"}
208 registry.insertDimensionData(dimensionName, dimensionValue)
209 # Inserting the same value twice should fail
210 with self.assertRaises(sqlalchemy.exc.IntegrityError):
211 registry.insertDimensionData(dimensionName, dimensionValue)
212 # expandDataId should retrieve the record we just inserted
213 self.assertEqual(
214 registry.expandDataId(
215 instrument="DummyCam",
216 graph=dimension.graph
217 ).records[dimensionName].toDict(),
218 dimensionValue
219 )
220 # expandDataId should raise if there is no record with the given ID.
221 with self.assertRaises(LookupError):
222 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
223 # band doesn't have a table; insert should fail.
224 with self.assertRaises(TypeError):
225 registry.insertDimensionData("band", {"band": "i"})
226 dimensionName2 = "physical_filter"
227 dimension2 = registry.dimensions[dimensionName2]
228 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
229 # Missing required dependency ("instrument") should fail
230 with self.assertRaises(KeyError):
231 registry.insertDimensionData(dimensionName2, dimensionValue2)
232 # Adding required dependency should fix the failure
233 dimensionValue2["instrument"] = "DummyCam"
234 registry.insertDimensionData(dimensionName2, dimensionValue2)
235 # expandDataId should retrieve the record we just inserted.
236 self.assertEqual(
237 registry.expandDataId(
238 instrument="DummyCam", physical_filter="DummyCam_i",
239 graph=dimension2.graph
240 ).records[dimensionName2].toDict(),
241 dimensionValue2
242 )
243 # Use syncDimensionData to insert a new record successfully.
244 dimensionName3 = "detector"
245 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
246 "name_in_raft": "zero", "purpose": "SCIENCE"}
247 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
248 # Sync that again. Note that one field ("raft") is NULL, and that
249 # should be okay.
250 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
251 # Now try that sync with the same primary key but a different value.
252 # This should fail.
253 with self.assertRaises(ConflictingDefinitionError):
254 registry.syncDimensionData(
255 dimensionName3,
256 {"instrument": "DummyCam", "id": 1, "full_name": "one",
257 "name_in_raft": "four", "purpose": "SCIENCE"}
258 )
260 @unittest.skipIf(np is None, "numpy not available.")
261 def testNumpyDataId(self):
262 """Test that we can use a numpy int in a dataId."""
263 registry = self.makeRegistry()
264 dimensionEntries = [
265 ("instrument", {"instrument": "DummyCam"}),
266 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
267 # Using an np.int64 here fails unless Records.fromDict is also
268 # patched to look for numbers.Integral
269 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
270 ]
271 for args in dimensionEntries:
272 registry.insertDimensionData(*args)
274 # Try a normal integer and something that looks like an int but
275 # is not.
276 for visit_id in (42, np.int64(42)):
277 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
278 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
279 self.assertEqual(expanded["visit"], int(visit_id))
280 self.assertIsInstance(expanded["visit"], int)
282 def testDataIdRelationships(self):
283 """Test that `Registry.expandDataId` raises an exception when the given
284 keys are inconsistent.
285 """
286 registry = self.makeRegistry()
287 self.loadData(registry, "base.yaml")
288 # Insert a few more dimension records for the next test.
289 registry.insertDimensionData(
290 "exposure",
291 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
292 )
293 registry.insertDimensionData(
294 "exposure",
295 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
296 )
297 registry.insertDimensionData(
298 "visit_system",
299 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
300 )
301 registry.insertDimensionData(
302 "visit",
303 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
304 )
305 registry.insertDimensionData(
306 "visit_definition",
307 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
308 )
309 with self.assertRaises(InconsistentDataIdError):
310 registry.expandDataId(
311 {"instrument": "Cam1", "visit": 1, "exposure": 2},
312 )
314 def testDataset(self):
315 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
316 and `Registry.removeDatasets`.
317 """
318 registry = self.makeRegistry()
319 self.loadData(registry, "base.yaml")
320 run = "test"
321 registry.registerRun(run)
322 datasetType = registry.getDatasetType("bias")
323 dataId = {"instrument": "Cam1", "detector": 2}
324 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
325 outRef = registry.getDataset(ref.id)
326 self.assertIsNotNone(ref.id)
327 self.assertEqual(ref, outRef)
328 with self.assertRaises(ConflictingDefinitionError):
329 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
330 registry.removeDatasets([ref])
331 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
333 def testFindDataset(self):
334 """Tests for `Registry.findDataset`.
335 """
336 registry = self.makeRegistry()
337 self.loadData(registry, "base.yaml")
338 run = "test"
339 datasetType = registry.getDatasetType("bias")
340 dataId = {"instrument": "Cam1", "detector": 4}
341 registry.registerRun(run)
342 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
343 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
344 self.assertEqual(outputRef, inputRef)
345 # Check that retrieval with invalid dataId raises
346 with self.assertRaises(LookupError):
347 dataId = {"instrument": "Cam1"} # no detector
348 registry.findDataset(datasetType, dataId, collections=run)
349 # Check that different dataIds match to different datasets
350 dataId1 = {"instrument": "Cam1", "detector": 1}
351 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
352 dataId2 = {"instrument": "Cam1", "detector": 2}
353 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
354 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
355 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
356 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
357 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
358 # Check that requesting a non-existing dataId returns None
359 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
360 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
362 def testRemoveDatasetTypeSuccess(self):
363 """Test that Registry.removeDatasetType works when there are no
364 datasets of that type present.
365 """
366 registry = self.makeRegistry()
367 self.loadData(registry, "base.yaml")
368 registry.removeDatasetType("flat")
369 with self.assertRaises(KeyError):
370 registry.getDatasetType("flat")
372 def testRemoveDatasetTypeFailure(self):
373 """Test that Registry.removeDatasetType raises when there are datasets
374 of that type present or if the dataset type is for a component.
375 """
376 registry = self.makeRegistry()
377 self.loadData(registry, "base.yaml")
378 self.loadData(registry, "datasets.yaml")
379 with self.assertRaises(OrphanedRecordError):
380 registry.removeDatasetType("flat")
381 with self.assertRaises(ValueError):
382 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
384 def testDatasetTypeComponentQueries(self):
385 """Test component options when querying for dataset types.
386 """
387 registry = self.makeRegistry()
388 self.loadData(registry, "base.yaml")
389 self.loadData(registry, "datasets.yaml")
390 # Test querying for dataset types with different inputs.
391 # First query for all dataset types; components should only be included
392 # when components=True.
393 self.assertEqual(
394 {"bias", "flat"},
395 NamedValueSet(registry.queryDatasetTypes()).names
396 )
397 self.assertEqual(
398 {"bias", "flat"},
399 NamedValueSet(registry.queryDatasetTypes(components=False)).names
400 )
401 self.assertLess(
402 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
403 NamedValueSet(registry.queryDatasetTypes(components=True)).names
404 )
405 # Use a pattern that can match either parent or components. Again,
406 # components are only returned if components=True.
407 self.assertEqual(
408 {"bias"},
409 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
410 )
411 self.assertEqual(
412 {"bias"},
413 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
414 )
415 self.assertLess(
416 {"bias", "bias.wcs"},
417 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
418 )
419 # This pattern matches only a component. In this case we also return
420 # that component dataset type if components=None.
421 self.assertEqual(
422 {"bias.wcs"},
423 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
424 )
425 self.assertEqual(
426 set(),
427 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
428 )
429 self.assertEqual(
430 {"bias.wcs"},
431 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
432 )
433 # Add a dataset type using a StorageClass that we'll then remove; check
434 # that this does not affect our ability to query for dataset types
435 # (though it will warn).
436 tempStorageClass = StorageClass(
437 name="TempStorageClass",
438 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
439 )
440 registry.storageClasses.registerStorageClass(tempStorageClass)
441 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
442 universe=registry.dimensions)
443 registry.registerDatasetType(datasetType)
444 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
445 datasetType._storageClass = None
446 del tempStorageClass
447 # Querying for all dataset types, including components, should include
448 # at least all non-component dataset types (and I don't want to
449 # enumerate all of the Exposure components for bias and flat here).
450 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
451 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
452 self.assertIn("TempStorageClass", cm.output[0])
453 self.assertLess({"bias", "flat", "temporary"}, everything.names)
454 # It should not include "temporary.columns", because we tried to remove
455 # the storage class that would tell it about that. So if the next line
456 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
457 # this part of the test isn't doing anything, because the _unregister
458 # call about isn't simulating the real-life case we want it to
459 # simulate, in which different versions of daf_butler in entirely
460 # different Python processes interact with the same repo.
461 self.assertNotIn("temporary.data", everything.names)
462 # Query for dataset types that start with "temp". This should again
463 # not include the component, and also not fail.
464 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
465 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
466 self.assertIn("TempStorageClass", cm.output[0])
467 self.assertEqual({"temporary"}, startsWithTemp.names)
469 def testComponentLookups(self):
470 """Test searching for component datasets via their parents.
471 """
472 registry = self.makeRegistry()
473 self.loadData(registry, "base.yaml")
474 self.loadData(registry, "datasets.yaml")
475 # Test getting the child dataset type (which does still exist in the
476 # Registry), and check for consistency with
477 # DatasetRef.makeComponentRef.
478 collection = "imported_g"
479 parentType = registry.getDatasetType("bias")
480 childType = registry.getDatasetType("bias.wcs")
481 parentRefResolved = registry.findDataset(parentType, collections=collection,
482 instrument="Cam1", detector=1)
483 self.assertIsInstance(parentRefResolved, DatasetRef)
484 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
485 # Search for a single dataset with findDataset.
486 childRef1 = registry.findDataset("bias.wcs", collections=collection,
487 dataId=parentRefResolved.dataId)
488 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
489 # Search for detector data IDs constrained by component dataset
490 # existence with queryDataIds.
491 dataIds = registry.queryDataIds(
492 ["detector"],
493 datasets=["bias.wcs"],
494 collections=collection,
495 ).toSet()
496 self.assertEqual(
497 dataIds,
498 DataCoordinateSet(
499 {
500 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
501 for d in (1, 2, 3)
502 },
503 parentType.dimensions,
504 )
505 )
506 # Search for multiple datasets of a single type with queryDatasets.
507 childRefs2 = set(registry.queryDatasets(
508 "bias.wcs",
509 collections=collection,
510 ))
511 self.assertEqual(
512 {ref.unresolved() for ref in childRefs2},
513 {DatasetRef(childType, dataId) for dataId in dataIds}
514 )
516 def testCollections(self):
517 """Tests for registry methods that manage collections.
518 """
519 registry = self.makeRegistry()
520 self.loadData(registry, "base.yaml")
521 self.loadData(registry, "datasets.yaml")
522 run1 = "imported_g"
523 run2 = "imported_r"
524 # Test setting a collection docstring after it has been created.
525 registry.setCollectionDocumentation(run1, "doc for run1")
526 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
527 registry.setCollectionDocumentation(run1, None)
528 self.assertIsNone(registry.getCollectionDocumentation(run1))
529 datasetType = "bias"
530 # Find some datasets via their run's collection.
531 dataId1 = {"instrument": "Cam1", "detector": 1}
532 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
533 self.assertIsNotNone(ref1)
534 dataId2 = {"instrument": "Cam1", "detector": 2}
535 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
536 self.assertIsNotNone(ref2)
537 # Associate those into a new collection,then look for them there.
538 tag1 = "tag1"
539 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
540 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
541 registry.associate(tag1, [ref1, ref2])
542 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
543 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
544 # Disassociate one and verify that we can't it there anymore...
545 registry.disassociate(tag1, [ref1])
546 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
547 # ...but we can still find ref2 in tag1, and ref1 in the run.
548 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
549 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
550 collections = set(registry.queryCollections())
551 self.assertEqual(collections, {run1, run2, tag1})
552 # Associate both refs into tag1 again; ref2 is already there, but that
553 # should be a harmless no-op.
554 registry.associate(tag1, [ref1, ref2])
555 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
556 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
557 # Get a different dataset (from a different run) that has the same
558 # dataset type and data ID as ref2.
559 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
560 self.assertNotEqual(ref2, ref2b)
561 # Attempting to associate that into tag1 should be an error.
562 with self.assertRaises(ConflictingDefinitionError):
563 registry.associate(tag1, [ref2b])
564 # That error shouldn't have messed up what we had before.
565 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
566 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
567 # Attempt to associate the conflicting dataset again, this time with
568 # a dataset that isn't in the collection and won't cause a conflict.
569 # Should also fail without modifying anything.
570 dataId3 = {"instrument": "Cam1", "detector": 3}
571 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
572 with self.assertRaises(ConflictingDefinitionError):
573 registry.associate(tag1, [ref3, ref2b])
574 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
575 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
576 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
577 # Register a chained collection that searches [tag1, run2]
578 chain1 = "chain1"
579 registry.registerCollection(chain1, type=CollectionType.CHAINED)
580 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
581 # Chained collection exists, but has no collections in it.
582 self.assertFalse(registry.getCollectionChain(chain1))
583 # If we query for all collections, we should get the chained collection
584 # only if we don't ask to flatten it (i.e. yield only its children).
585 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
586 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
587 # Attempt to set its child collections to something circular; that
588 # should fail.
589 with self.assertRaises(ValueError):
590 registry.setCollectionChain(chain1, [tag1, chain1])
591 # Add the child collections.
592 registry.setCollectionChain(chain1, [tag1, run2])
593 self.assertEqual(
594 list(registry.getCollectionChain(chain1)),
595 [tag1, run2]
596 )
597 # Searching for dataId1 or dataId2 in the chain should return ref1 and
598 # ref2, because both are in tag1.
599 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
600 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
601 # Now disassociate ref2 from tag1. The search (for bias) with
602 # dataId2 in chain1 should then:
603 # 1. not find it in tag1
604 # 2. find a different dataset in run2
605 registry.disassociate(tag1, [ref2])
606 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
607 self.assertNotEqual(ref2b, ref2)
608 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
609 # Define a new chain so we can test recursive chains.
610 chain2 = "chain2"
611 registry.registerCollection(chain2, type=CollectionType.CHAINED)
612 registry.setCollectionChain(chain2, [run2, chain1])
613 # Query for collections matching a regex.
614 self.assertCountEqual(
615 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
616 ["imported_r", "imported_g"]
617 )
618 # Query for collections matching a regex or an explicit str.
619 self.assertCountEqual(
620 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
621 ["imported_r", "imported_g", "chain1"]
622 )
623 # Search for bias with dataId1 should find it via tag1 in chain2,
624 # recursing, because is not in run1.
625 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
626 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
627 # Search for bias with dataId2 should find it in run2 (ref2b).
628 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
629 # Search for a flat that is in run2. That should not be found
630 # at the front of chain2, because of the restriction to bias
631 # on run2 there, but it should be found in at the end of chain1.
632 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
633 ref4 = registry.findDataset("flat", dataId4, collections=run2)
634 self.assertIsNotNone(ref4)
635 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
636 # Deleting a collection that's part of a CHAINED collection is not
637 # allowed, and is exception-safe.
638 with self.assertRaises(Exception):
639 registry.removeCollection(run2)
640 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
641 with self.assertRaises(Exception):
642 registry.removeCollection(chain1)
643 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
644 # Actually remove chain2, test that it's gone by asking for its type.
645 registry.removeCollection(chain2)
646 with self.assertRaises(MissingCollectionError):
647 registry.getCollectionType(chain2)
648 # Actually remove run2 and chain1, which should work now.
649 registry.removeCollection(chain1)
650 registry.removeCollection(run2)
651 with self.assertRaises(MissingCollectionError):
652 registry.getCollectionType(run2)
653 with self.assertRaises(MissingCollectionError):
654 registry.getCollectionType(chain1)
655 # Remove tag1 as well, just to test that we can remove TAGGED
656 # collections.
657 registry.removeCollection(tag1)
658 with self.assertRaises(MissingCollectionError):
659 registry.getCollectionType(tag1)
661 def testCollectionChainFlatten(self):
662 """Test that Registry.setCollectionChain obeys its 'flatten' option.
663 """
664 registry = self.makeRegistry()
665 registry.registerCollection("inner", CollectionType.CHAINED)
666 registry.registerCollection("innermost", CollectionType.RUN)
667 registry.setCollectionChain("inner", ["innermost"])
668 registry.registerCollection("outer", CollectionType.CHAINED)
669 registry.setCollectionChain("outer", ["inner"], flatten=False)
670 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
671 registry.setCollectionChain("outer", ["inner"], flatten=True)
672 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
674 def testBasicTransaction(self):
675 """Test that all operations within a single transaction block are
676 rolled back if an exception propagates out of the block.
677 """
678 registry = self.makeRegistry()
679 storageClass = StorageClass("testDatasetType")
680 registry.storageClasses.registerStorageClass(storageClass)
681 with registry.transaction():
682 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
683 with self.assertRaises(ValueError):
684 with registry.transaction():
685 registry.insertDimensionData("instrument", {"name": "Cam2"})
686 raise ValueError("Oops, something went wrong")
687 # Cam1 should exist
688 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
689 # But Cam2 and Cam3 should both not exist
690 with self.assertRaises(LookupError):
691 registry.expandDataId(instrument="Cam2")
692 with self.assertRaises(LookupError):
693 registry.expandDataId(instrument="Cam3")
695 def testNestedTransaction(self):
696 """Test that operations within a transaction block are not rolled back
697 if an exception propagates out of an inner transaction block and is
698 then caught.
699 """
700 registry = self.makeRegistry()
701 dimension = registry.dimensions["instrument"]
702 dataId1 = {"instrument": "DummyCam"}
703 dataId2 = {"instrument": "DummyCam2"}
704 checkpointReached = False
705 with registry.transaction():
706 # This should be added and (ultimately) committed.
707 registry.insertDimensionData(dimension, dataId1)
708 with self.assertRaises(sqlalchemy.exc.IntegrityError):
709 with registry.transaction(savepoint=True):
710 # This does not conflict, and should succeed (but not
711 # be committed).
712 registry.insertDimensionData(dimension, dataId2)
713 checkpointReached = True
714 # This should conflict and raise, triggerring a rollback
715 # of the previous insertion within the same transaction
716 # context, but not the original insertion in the outer
717 # block.
718 registry.insertDimensionData(dimension, dataId1)
719 self.assertTrue(checkpointReached)
720 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
721 with self.assertRaises(LookupError):
722 registry.expandDataId(dataId2, graph=dimension.graph)
724 def testInstrumentDimensions(self):
725 """Test queries involving only instrument dimensions, with no joins to
726 skymap."""
727 registry = self.makeRegistry()
729 # need a bunch of dimensions and datasets for test
730 registry.insertDimensionData(
731 "instrument",
732 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
733 )
734 registry.insertDimensionData(
735 "physical_filter",
736 dict(instrument="DummyCam", name="dummy_r", band="r"),
737 dict(instrument="DummyCam", name="dummy_i", band="i"),
738 )
739 registry.insertDimensionData(
740 "detector",
741 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
742 )
743 registry.insertDimensionData(
744 "visit_system",
745 dict(instrument="DummyCam", id=1, name="default"),
746 )
747 registry.insertDimensionData(
748 "visit",
749 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
750 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
751 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
752 )
753 registry.insertDimensionData(
754 "exposure",
755 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
756 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
757 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
758 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
759 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
760 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
761 )
762 registry.insertDimensionData(
763 "visit_definition",
764 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
765 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
766 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
767 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
768 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
769 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
770 )
771 # dataset types
772 run1 = "test1_r"
773 run2 = "test2_r"
774 tagged2 = "test2_t"
775 registry.registerRun(run1)
776 registry.registerRun(run2)
777 registry.registerCollection(tagged2)
778 storageClass = StorageClass("testDataset")
779 registry.storageClasses.registerStorageClass(storageClass)
780 rawType = DatasetType(name="RAW",
781 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
782 storageClass=storageClass)
783 registry.registerDatasetType(rawType)
784 calexpType = DatasetType(name="CALEXP",
785 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
786 storageClass=storageClass)
787 registry.registerDatasetType(calexpType)
789 # add pre-existing datasets
790 for exposure in (100, 101, 110, 111):
791 for detector in (1, 2, 3):
792 # note that only 3 of 5 detectors have datasets
793 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
794 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
795 # exposures 100 and 101 appear in both run1 and tagged2.
796 # 100 has different datasets in the different collections
797 # 101 has the same dataset in both collections.
798 if exposure == 100:
799 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
800 if exposure in (100, 101):
801 registry.associate(tagged2, [ref])
802 # Add pre-existing datasets to tagged2.
803 for exposure in (200, 201):
804 for detector in (3, 4, 5):
805 # note that only 3 of 5 detectors have datasets
806 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
807 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
808 registry.associate(tagged2, [ref])
810 dimensions = DimensionGraph(
811 registry.dimensions,
812 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
813 )
814 # Test that single dim string works as well as list of str
815 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
816 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
817 self.assertEqual(rows, rowsI)
818 # with empty expression
819 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
820 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
821 for dataId in rows:
822 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
823 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
824 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
825 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
826 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
827 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
828 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
829 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
830 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
831 (100, 101, 110, 111))
832 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
833 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
835 # second collection
836 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
837 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
838 for dataId in rows:
839 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
840 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
841 (100, 101, 200, 201))
842 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
843 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
845 # with two input datasets
846 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
847 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
848 for dataId in rows:
849 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
850 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
851 (100, 101, 110, 111, 200, 201))
852 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
853 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
855 # limit to single visit
856 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
857 where="visit = 10", instrument="DummyCam").toSet()
858 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
859 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
860 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
861 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
863 # more limiting expression, using link names instead of Table.column
864 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
865 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
866 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
867 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
868 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
869 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
871 # expression excludes everything
872 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
873 where="visit > 1000", instrument="DummyCam").toSet()
874 self.assertEqual(len(rows), 0)
876 # Selecting by physical_filter, this is not in the dimensions, but it
877 # is a part of the full expression so it should work too.
878 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
879 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
880 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
881 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
882 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
883 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
885 def testSkyMapDimensions(self):
886 """Tests involving only skymap dimensions, no joins to instrument."""
887 registry = self.makeRegistry()
889 # need a bunch of dimensions and datasets for test, we want
890 # "band" in the test so also have to add physical_filter
891 # dimensions
892 registry.insertDimensionData(
893 "instrument",
894 dict(instrument="DummyCam")
895 )
896 registry.insertDimensionData(
897 "physical_filter",
898 dict(instrument="DummyCam", name="dummy_r", band="r"),
899 dict(instrument="DummyCam", name="dummy_i", band="i"),
900 )
901 registry.insertDimensionData(
902 "skymap",
903 dict(name="DummyMap", hash="sha!".encode("utf8"))
904 )
905 for tract in range(10):
906 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
907 registry.insertDimensionData(
908 "patch",
909 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
910 for patch in range(10)]
911 )
913 # dataset types
914 run = "test"
915 registry.registerRun(run)
916 storageClass = StorageClass("testDataset")
917 registry.storageClasses.registerStorageClass(storageClass)
918 calexpType = DatasetType(name="deepCoadd_calexp",
919 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
920 "band")),
921 storageClass=storageClass)
922 registry.registerDatasetType(calexpType)
923 mergeType = DatasetType(name="deepCoadd_mergeDet",
924 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
925 storageClass=storageClass)
926 registry.registerDatasetType(mergeType)
927 measType = DatasetType(name="deepCoadd_meas",
928 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
929 "band")),
930 storageClass=storageClass)
931 registry.registerDatasetType(measType)
933 dimensions = DimensionGraph(
934 registry.dimensions,
935 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
936 | measType.dimensions.required)
937 )
939 # add pre-existing datasets
940 for tract in (1, 3, 5):
941 for patch in (2, 4, 6, 7):
942 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
943 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
944 for aFilter in ("i", "r"):
945 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
946 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
948 # with empty expression
949 rows = registry.queryDataIds(dimensions,
950 datasets=[calexpType, mergeType], collections=run).toSet()
951 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
952 for dataId in rows:
953 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
954 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
955 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
956 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
958 # limit to 2 tracts and 2 patches
959 rows = registry.queryDataIds(dimensions,
960 datasets=[calexpType, mergeType], collections=run,
961 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
962 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
963 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
964 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
965 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
967 # limit to single filter
968 rows = registry.queryDataIds(dimensions,
969 datasets=[calexpType, mergeType], collections=run,
970 where="band = 'i'").toSet()
971 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
972 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
973 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
974 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
976 # expression excludes everything, specifying non-existing skymap is
977 # not a fatal error, it's operator error
978 rows = registry.queryDataIds(dimensions,
979 datasets=[calexpType, mergeType], collections=run,
980 where="skymap = 'Mars'").toSet()
981 self.assertEqual(len(rows), 0)
983 def testSpatialJoin(self):
984 """Test queries that involve spatial overlap joins.
985 """
986 registry = self.makeRegistry()
987 self.loadData(registry, "hsc-rc2-subset.yaml")
989 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
990 # the TopologicalFamily they belong to. We'll relate all elements in
991 # each family to all of the elements in each other family.
992 families = defaultdict(set)
993 # Dictionary of {element.name: {dataId: region}}.
994 regions = {}
995 for element in registry.dimensions.getDatabaseElements():
996 if element.spatial is not None:
997 families[element.spatial.name].add(element)
998 regions[element.name] = {
999 record.dataId: record.region for record in registry.queryDimensionRecords(element)
1000 }
1002 # If this check fails, it's not necessarily a problem - it may just be
1003 # a reasonable change to the default dimension definitions - but the
1004 # test below depends on there being more than one family to do anything
1005 # useful.
1006 self.assertEqual(len(families), 2)
1008 # Overlap DatabaseDimensionElements with each other.
1009 for family1, family2 in itertools.combinations(families, 2):
1010 for element1, element2 in itertools.product(families[family1], families[family2]):
1011 graph = DimensionGraph.union(element1.graph, element2.graph)
1012 # Construct expected set of overlapping data IDs via a
1013 # brute-force comparison of the regions we've already fetched.
1014 expected = {
1015 DataCoordinate.standardize(
1016 {**dataId1.byName(), **dataId2.byName()},
1017 graph=graph
1018 )
1019 for (dataId1, region1), (dataId2, region2)
1020 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1021 if not region1.isDisjointFrom(region2)
1022 }
1023 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1024 queried = set(registry.queryDataIds(graph))
1025 self.assertEqual(expected, queried)
1027 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1028 commonSkyPix = registry.dimensions.commonSkyPix
1029 for elementName, regions in regions.items():
1030 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1031 expected = set()
1032 for dataId, region in regions.items():
1033 for begin, end in commonSkyPix.pixelization.envelope(region):
1034 expected.update(
1035 DataCoordinate.standardize(
1036 {commonSkyPix.name: index, **dataId.byName()},
1037 graph=graph
1038 )
1039 for index in range(begin, end)
1040 )
1041 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1042 queried = set(registry.queryDataIds(graph))
1043 self.assertEqual(expected, queried)
1045 def testAbstractQuery(self):
1046 """Test that we can run a query that just lists the known
1047 bands. This is tricky because band is
1048 backed by a query against physical_filter.
1049 """
1050 registry = self.makeRegistry()
1051 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1052 registry.insertDimensionData(
1053 "physical_filter",
1054 dict(instrument="DummyCam", name="dummy_i", band="i"),
1055 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1056 dict(instrument="DummyCam", name="dummy_r", band="r"),
1057 )
1058 rows = registry.queryDataIds(["band"]).toSet()
1059 self.assertCountEqual(
1060 rows,
1061 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1062 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1063 )
1065 def testAttributeManager(self):
1066 """Test basic functionality of attribute manager.
1067 """
1068 # number of attributes with schema versions in a fresh database,
1069 # 6 managers with 3 records per manager, plus config for dimensions
1070 VERSION_COUNT = 6 * 3 + 1
1072 registry = self.makeRegistry()
1073 attributes = registry._managers.attributes
1075 # check what get() returns for non-existing key
1076 self.assertIsNone(attributes.get("attr"))
1077 self.assertEqual(attributes.get("attr", ""), "")
1078 self.assertEqual(attributes.get("attr", "Value"), "Value")
1079 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1081 # cannot store empty key or value
1082 with self.assertRaises(ValueError):
1083 attributes.set("", "value")
1084 with self.assertRaises(ValueError):
1085 attributes.set("attr", "")
1087 # set value of non-existing key
1088 attributes.set("attr", "value")
1089 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1090 self.assertEqual(attributes.get("attr"), "value")
1092 # update value of existing key
1093 with self.assertRaises(ButlerAttributeExistsError):
1094 attributes.set("attr", "value2")
1096 attributes.set("attr", "value2", force=True)
1097 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1098 self.assertEqual(attributes.get("attr"), "value2")
1100 # delete existing key
1101 self.assertTrue(attributes.delete("attr"))
1102 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1104 # delete non-existing key
1105 self.assertFalse(attributes.delete("non-attr"))
1107 # store bunch of keys and get the list back
1108 data = [
1109 ("version.core", "1.2.3"),
1110 ("version.dimensions", "3.2.1"),
1111 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1112 ]
1113 for key, value in data:
1114 attributes.set(key, value)
1115 items = dict(attributes.items())
1116 for key, value in data:
1117 self.assertEqual(items[key], value)
1119 def testQueryDatasetsDeduplication(self):
1120 """Test that the findFirst option to queryDatasets selects datasets
1121 from collections in the order given".
1122 """
1123 registry = self.makeRegistry()
1124 self.loadData(registry, "base.yaml")
1125 self.loadData(registry, "datasets.yaml")
1126 self.assertCountEqual(
1127 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1128 [
1129 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1130 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1131 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1132 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1133 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1134 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1135 ]
1136 )
1137 self.assertCountEqual(
1138 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1139 findFirst=True)),
1140 [
1141 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1142 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1143 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1144 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1145 ]
1146 )
1147 self.assertCountEqual(
1148 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1149 findFirst=True)),
1150 [
1151 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1152 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1153 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1154 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1155 ]
1156 )
1158 def testQueryResults(self):
1159 """Test querying for data IDs and then manipulating the QueryResults
1160 object returned to perform other queries.
1161 """
1162 registry = self.makeRegistry()
1163 self.loadData(registry, "base.yaml")
1164 self.loadData(registry, "datasets.yaml")
1165 bias = registry.getDatasetType("bias")
1166 flat = registry.getDatasetType("flat")
1167 # Obtain expected results from methods other than those we're testing
1168 # here. That includes:
1169 # - the dimensions of the data IDs we want to query:
1170 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1171 # - the dimensions of some other data IDs we'll extract from that:
1172 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1173 # - the data IDs we expect to obtain from the first queries:
1174 expectedDataIds = DataCoordinateSet(
1175 {
1176 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1177 universe=registry.dimensions)
1178 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1179 },
1180 graph=expectedGraph,
1181 hasFull=False,
1182 hasRecords=False,
1183 )
1184 # - the flat datasets we expect to find from those data IDs, in just
1185 # one collection (so deduplication is irrelevant):
1186 expectedFlats = [
1187 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1188 collections="imported_r"),
1189 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1190 collections="imported_r"),
1191 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1192 collections="imported_r"),
1193 ]
1194 # - the data IDs we expect to extract from that:
1195 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1196 # - the bias datasets we expect to find from those data IDs, after we
1197 # subset-out the physical_filter dimension, both with duplicates:
1198 expectedAllBiases = [
1199 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1200 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1201 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1202 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1203 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1204 ]
1205 # - ...and without duplicates:
1206 expectedDeduplicatedBiases = [
1207 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1208 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1209 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1210 ]
1211 # Test against those expected results, using a "lazy" query for the
1212 # data IDs (which re-executes that query each time we use it to do
1213 # something new).
1214 dataIds = registry.queryDataIds(
1215 ["detector", "physical_filter"],
1216 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1217 instrument="Cam1",
1218 )
1219 self.assertEqual(dataIds.graph, expectedGraph)
1220 self.assertEqual(dataIds.toSet(), expectedDataIds)
1221 self.assertCountEqual(
1222 list(
1223 dataIds.findDatasets(
1224 flat,
1225 collections=["imported_r"],
1226 )
1227 ),
1228 expectedFlats,
1229 )
1230 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1231 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1232 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1233 self.assertCountEqual(
1234 list(
1235 subsetDataIds.findDatasets(
1236 bias,
1237 collections=["imported_r", "imported_g"],
1238 findFirst=False
1239 )
1240 ),
1241 expectedAllBiases
1242 )
1243 self.assertCountEqual(
1244 list(
1245 subsetDataIds.findDatasets(
1246 bias,
1247 collections=["imported_r", "imported_g"],
1248 findFirst=True
1249 )
1250 ), expectedDeduplicatedBiases
1251 )
1252 # Materialize the bias dataset queries (only) by putting the results
1253 # into temporary tables, then repeat those tests.
1254 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1255 findFirst=False).materialize() as biases:
1256 self.assertCountEqual(list(biases), expectedAllBiases)
1257 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1258 findFirst=True).materialize() as biases:
1259 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1260 # Materialize the data ID subset query, but not the dataset queries.
1261 with subsetDataIds.materialize() as subsetDataIds:
1262 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1263 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1264 self.assertCountEqual(
1265 list(
1266 subsetDataIds.findDatasets(
1267 bias,
1268 collections=["imported_r", "imported_g"],
1269 findFirst=False
1270 )
1271 ),
1272 expectedAllBiases
1273 )
1274 self.assertCountEqual(
1275 list(
1276 subsetDataIds.findDatasets(
1277 bias,
1278 collections=["imported_r", "imported_g"],
1279 findFirst=True
1280 )
1281 ), expectedDeduplicatedBiases
1282 )
1283 # Materialize the dataset queries, too.
1284 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1285 findFirst=False).materialize() as biases:
1286 self.assertCountEqual(list(biases), expectedAllBiases)
1287 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1288 findFirst=True).materialize() as biases:
1289 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1290 # Materialize the original query, but none of the follow-up queries.
1291 with dataIds.materialize() as dataIds:
1292 self.assertEqual(dataIds.graph, expectedGraph)
1293 self.assertEqual(dataIds.toSet(), expectedDataIds)
1294 self.assertCountEqual(
1295 list(
1296 dataIds.findDatasets(
1297 flat,
1298 collections=["imported_r"],
1299 )
1300 ),
1301 expectedFlats,
1302 )
1303 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1304 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1305 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1306 self.assertCountEqual(
1307 list(
1308 subsetDataIds.findDatasets(
1309 bias,
1310 collections=["imported_r", "imported_g"],
1311 findFirst=False
1312 )
1313 ),
1314 expectedAllBiases
1315 )
1316 self.assertCountEqual(
1317 list(
1318 subsetDataIds.findDatasets(
1319 bias,
1320 collections=["imported_r", "imported_g"],
1321 findFirst=True
1322 )
1323 ), expectedDeduplicatedBiases
1324 )
1325 # Materialize just the bias dataset queries.
1326 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1327 findFirst=False).materialize() as biases:
1328 self.assertCountEqual(list(biases), expectedAllBiases)
1329 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1330 findFirst=True).materialize() as biases:
1331 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1332 # Materialize the subset data ID query, but not the dataset
1333 # queries.
1334 with subsetDataIds.materialize() as subsetDataIds:
1335 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1336 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1337 self.assertCountEqual(
1338 list(
1339 subsetDataIds.findDatasets(
1340 bias,
1341 collections=["imported_r", "imported_g"],
1342 findFirst=False
1343 )
1344 ),
1345 expectedAllBiases
1346 )
1347 self.assertCountEqual(
1348 list(
1349 subsetDataIds.findDatasets(
1350 bias,
1351 collections=["imported_r", "imported_g"],
1352 findFirst=True
1353 )
1354 ), expectedDeduplicatedBiases
1355 )
1356 # Materialize the bias dataset queries, too, so now we're
1357 # materializing every single step.
1358 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1359 findFirst=False).materialize() as biases:
1360 self.assertCountEqual(list(biases), expectedAllBiases)
1361 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1362 findFirst=True).materialize() as biases:
1363 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1365 def testEmptyDimensionsQueries(self):
1366 """Test Query and QueryResults objects in the case where there are no
1367 dimensions.
1368 """
1369 # Set up test data: one dataset type, two runs, one dataset in each.
1370 registry = self.makeRegistry()
1371 self.loadData(registry, "base.yaml")
1372 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1373 registry.registerDatasetType(schema)
1374 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1375 run1 = "run1"
1376 run2 = "run2"
1377 registry.registerRun(run1)
1378 registry.registerRun(run2)
1379 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1380 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1381 # Query directly for both of the datasets, and each one, one at a time.
1382 self.assertCountEqual(
1383 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1384 [dataset1, dataset2]
1385 )
1386 self.assertEqual(
1387 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1388 [dataset1],
1389 )
1390 self.assertEqual(
1391 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1392 [dataset2],
1393 )
1394 # Query for data IDs with no dimensions.
1395 dataIds = registry.queryDataIds([])
1396 self.assertEqual(
1397 dataIds.toSequence(),
1398 DataCoordinateSequence([dataId], registry.dimensions.empty)
1399 )
1400 # Use queried data IDs to find the datasets.
1401 self.assertCountEqual(
1402 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1403 [dataset1, dataset2],
1404 )
1405 self.assertEqual(
1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1407 [dataset1],
1408 )
1409 self.assertEqual(
1410 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1411 [dataset2],
1412 )
1413 # Now materialize the data ID query results and repeat those tests.
1414 with dataIds.materialize() as dataIds:
1415 self.assertEqual(
1416 dataIds.toSequence(),
1417 DataCoordinateSequence([dataId], registry.dimensions.empty)
1418 )
1419 self.assertCountEqual(
1420 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1421 [dataset1, dataset2],
1422 )
1423 self.assertEqual(
1424 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1425 [dataset1],
1426 )
1427 self.assertEqual(
1428 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1429 [dataset2],
1430 )
1431 # Query for non-empty data IDs, then subset that to get the empty one.
1432 # Repeat the above tests starting from that.
1433 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1434 self.assertEqual(
1435 dataIds.toSequence(),
1436 DataCoordinateSequence([dataId], registry.dimensions.empty)
1437 )
1438 self.assertCountEqual(
1439 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1440 [dataset1, dataset2],
1441 )
1442 self.assertEqual(
1443 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1444 [dataset1],
1445 )
1446 self.assertEqual(
1447 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1448 [dataset2],
1449 )
1450 with dataIds.materialize() as dataIds:
1451 self.assertEqual(
1452 dataIds.toSequence(),
1453 DataCoordinateSequence([dataId], registry.dimensions.empty)
1454 )
1455 self.assertCountEqual(
1456 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1457 [dataset1, dataset2],
1458 )
1459 self.assertEqual(
1460 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1461 [dataset1],
1462 )
1463 self.assertEqual(
1464 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1465 [dataset2],
1466 )
1467 # Query for non-empty data IDs, then materialize, then subset to get
1468 # the empty one. Repeat again.
1469 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1470 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1471 self.assertEqual(
1472 dataIds.toSequence(),
1473 DataCoordinateSequence([dataId], registry.dimensions.empty)
1474 )
1475 self.assertCountEqual(
1476 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1477 [dataset1, dataset2],
1478 )
1479 self.assertEqual(
1480 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1481 [dataset1],
1482 )
1483 self.assertEqual(
1484 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1485 [dataset2],
1486 )
1487 with dataIds.materialize() as dataIds:
1488 self.assertEqual(
1489 dataIds.toSequence(),
1490 DataCoordinateSequence([dataId], registry.dimensions.empty)
1491 )
1492 self.assertCountEqual(
1493 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1494 [dataset1, dataset2],
1495 )
1496 self.assertEqual(
1497 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1498 [dataset1],
1499 )
1500 self.assertEqual(
1501 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1502 [dataset2],
1503 )
1505 def testDimensionDataModifications(self):
1506 """Test that modifying dimension records via:
1507 syncDimensionData(..., update=True) and
1508 insertDimensionData(..., replace=True) works as expected, even in the
1509 presence of datasets using those dimensions and spatial overlap
1510 relationships.
1511 """
1513 def unpack_range_set(ranges: lsst.sphgeom.RangeSet) -> Iterator[int]:
1514 """Unpack a sphgeom.RangeSet into the integers it contains.
1515 """
1516 for begin, end in ranges:
1517 yield from range(begin, end)
1519 def range_set_hull(
1520 ranges: lsst.sphgeom.RangeSet,
1521 pixelization: lsst.sphgeom.HtmPixelization,
1522 ) -> lsst.sphgeom.ConvexPolygon:
1523 """Create a ConvexPolygon hull of the region defined by a set of
1524 HTM pixelization index ranges.
1525 """
1526 points = []
1527 for index in unpack_range_set(ranges):
1528 points.extend(pixelization.triangle(index).getVertices())
1529 return lsst.sphgeom.ConvexPolygon(points)
1531 # Use HTM to set up an initial parent region (one arbitrary trixel)
1532 # and four child regions (the trixels within the parent at the next
1533 # level. We'll use the parent as a tract/visit region and the children
1534 # as its patch/visit_detector regions.
1535 registry = self.makeRegistry()
1536 htm6 = registry.dimensions.skypix["htm"][6].pixelization
1537 commonSkyPix = registry.dimensions.commonSkyPix.pixelization
1538 index = 12288
1539 child_ranges_small = lsst.sphgeom.RangeSet(index).scaled(4)
1540 assert htm6.universe().contains(child_ranges_small)
1541 child_regions_small = [htm6.triangle(i) for i in unpack_range_set(child_ranges_small)]
1542 parent_region_small = lsst.sphgeom.ConvexPolygon(
1543 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_small))
1544 )
1545 assert all(parent_region_small.contains(c) for c in child_regions_small)
1546 # Make a larger version of each child region, defined to be the set of
1547 # htm6 trixels that overlap the original's bounding circle. Make a new
1548 # parent that's the convex hull of the new children.
1549 child_regions_large = [
1550 range_set_hull(htm6.envelope(c.getBoundingCircle()), htm6)
1551 for c in child_regions_small
1552 ]
1553 assert all(large.contains(small) for large, small in zip(child_regions_large, child_regions_small))
1554 parent_region_large = lsst.sphgeom.ConvexPolygon(
1555 list(itertools.chain.from_iterable(c.getVertices() for c in child_regions_large))
1556 )
1557 assert all(parent_region_large.contains(c) for c in child_regions_large)
1558 assert parent_region_large.contains(parent_region_small)
1559 assert not parent_region_small.contains(parent_region_large)
1560 assert not all(parent_region_small.contains(c) for c in child_regions_large)
1561 # Find some commonSkyPix indices that overlap the large regions but not
1562 # overlap the small regions. We use commonSkyPix here to make sure the
1563 # real tests later involve what's in the database, not just post-query
1564 # region filtering.
1565 child_difference_indices = []
1566 for large, small in zip(child_regions_large, child_regions_small):
1567 difference = list(unpack_range_set(commonSkyPix.envelope(large) - commonSkyPix.envelope(small)))
1568 assert difference, "if this is empty, we can't test anything useful with these regions"
1569 assert all(
1570 not commonSkyPix.triangle(d).isDisjointFrom(large)
1571 and commonSkyPix.triangle(d).isDisjointFrom(small)
1572 for d in difference
1573 )
1574 child_difference_indices.append(difference)
1575 parent_difference_indices = list(
1576 unpack_range_set(
1577 commonSkyPix.envelope(parent_region_large) - commonSkyPix.envelope(parent_region_small)
1578 )
1579 )
1580 assert parent_difference_indices, "if this is empty, we can't test anything useful with these regions"
1581 assert all(
1582 (
1583 not commonSkyPix.triangle(d).isDisjointFrom(parent_region_large)
1584 and commonSkyPix.triangle(d).isDisjointFrom(parent_region_small)
1585 )
1586 for d in parent_difference_indices
1587 )
1588 # Now that we've finally got those regions, we'll insert the large ones
1589 # as tract/patch dimension records.
1590 skymap_name = "testing_v1"
1591 registry.insertDimensionData(
1592 "skymap", {
1593 "name": skymap_name,
1594 "hash": bytes([42]),
1595 "tract_max": 1,
1596 "patch_nx_max": 2,
1597 "patch_ny_max": 2,
1598 }
1599 )
1600 registry.insertDimensionData(
1601 "tract",
1602 {"skymap": skymap_name, "id": 0, "region": parent_region_large}
1603 )
1604 registry.insertDimensionData(
1605 "patch",
1606 *[{
1607 "skymap": skymap_name,
1608 "tract": 0,
1609 "id": n,
1610 "cell_x": n % 2,
1611 "cell_y": n // 2,
1612 "region": c
1613 } for n, c in enumerate(child_regions_large)]
1614 )
1615 # Add at dataset that uses these dimensions to make sure that modifying
1616 # them doesn't disrupt foreign keys (need to make sure DB doesn't
1617 # implement insert with replace=True as delete-then-insert).
1618 dataset_type = DatasetType(
1619 "coadd",
1620 dimensions=["tract", "patch"],
1621 universe=registry.dimensions,
1622 storageClass="Exposure",
1623 )
1624 registry.registerDatasetType(dataset_type)
1625 registry.registerCollection("the_run", CollectionType.RUN)
1626 registry.insertDatasets(
1627 dataset_type,
1628 [{"skymap": skymap_name, "tract": 0, "patch": 2}],
1629 run="the_run",
1630 )
1631 # Query for tracts and patches that overlap some "difference" htm9
1632 # pixels; there should be overlaps, because the database has
1633 # the "large" suite of regions.
1634 self.assertEqual(
1635 {0},
1636 {
1637 data_id["tract"] for data_id in registry.queryDataIds(
1638 ["tract"],
1639 skymap=skymap_name,
1640 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1641 )
1642 }
1643 )
1644 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1645 self.assertIn(
1646 patch_id,
1647 {
1648 data_id["patch"] for data_id in registry.queryDataIds(
1649 ["patch"],
1650 skymap=skymap_name,
1651 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1652 )
1653 }
1654 )
1655 # Use sync to update the tract region and insert to update the patch
1656 # regions, to the "small" suite.
1657 updated = registry.syncDimensionData(
1658 "tract",
1659 {"skymap": skymap_name, "id": 0, "region": parent_region_small},
1660 update=True,
1661 )
1662 self.assertEqual(updated, {"region": parent_region_large})
1663 registry.insertDimensionData(
1664 "patch",
1665 *[{
1666 "skymap": skymap_name,
1667 "tract": 0,
1668 "id": n,
1669 "cell_x": n % 2,
1670 "cell_y": n // 2,
1671 "region": c
1672 } for n, c in enumerate(child_regions_small)],
1673 replace=True
1674 )
1675 # Query again; there now should be no such overlaps, because the
1676 # database has the "small" suite of regions.
1677 self.assertFalse(
1678 set(
1679 registry.queryDataIds(
1680 ["tract"],
1681 skymap=skymap_name,
1682 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1683 )
1684 )
1685 )
1686 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1687 self.assertNotIn(
1688 patch_id,
1689 {
1690 data_id["patch"] for data_id in registry.queryDataIds(
1691 ["patch"],
1692 skymap=skymap_name,
1693 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1694 )
1695 }
1696 )
1697 # Update back to the large regions and query one more time.
1698 updated = registry.syncDimensionData(
1699 "tract",
1700 {"skymap": skymap_name, "id": 0, "region": parent_region_large},
1701 update=True,
1702 )
1703 self.assertEqual(updated, {"region": parent_region_small})
1704 registry.insertDimensionData(
1705 "patch",
1706 *[{
1707 "skymap": skymap_name,
1708 "tract": 0,
1709 "id": n,
1710 "cell_x": n % 2,
1711 "cell_y": n // 2,
1712 "region": c
1713 } for n, c in enumerate(child_regions_large)],
1714 replace=True
1715 )
1716 self.assertEqual(
1717 {0},
1718 {
1719 data_id["tract"] for data_id in registry.queryDataIds(
1720 ["tract"],
1721 skymap=skymap_name,
1722 dataId={registry.dimensions.commonSkyPix.name: parent_difference_indices[0]},
1723 )
1724 }
1725 )
1726 for patch_id, patch_difference_indices in enumerate(child_difference_indices):
1727 self.assertIn(
1728 patch_id,
1729 {
1730 data_id["patch"] for data_id in registry.queryDataIds(
1731 ["patch"],
1732 skymap=skymap_name,
1733 dataId={registry.dimensions.commonSkyPix.name: patch_difference_indices[0]},
1734 )
1735 }
1736 )
1738 def testCalibrationCollections(self):
1739 """Test operations on `~CollectionType.CALIBRATION` collections,
1740 including `Registry.certify`, `Registry.decertify`, and
1741 `Registry.findDataset`.
1742 """
1743 # Setup - make a Registry, fill it with some datasets in
1744 # non-calibration collections.
1745 registry = self.makeRegistry()
1746 self.loadData(registry, "base.yaml")
1747 self.loadData(registry, "datasets.yaml")
1748 # Set up some timestamps.
1749 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1750 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1751 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1752 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1753 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1754 allTimespans = [
1755 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1756 ]
1757 # Get references to some datasets.
1758 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1759 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1760 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1761 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1762 # Register the main calibration collection we'll be working with.
1763 collection = "Cam1/calibs/default"
1764 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1765 # Cannot associate into a calibration collection (no timespan).
1766 with self.assertRaises(TypeError):
1767 registry.associate(collection, [bias2a])
1768 # Certify 2a dataset with [t2, t4) validity.
1769 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1770 # We should not be able to certify 2b with anything overlapping that
1771 # window.
1772 with self.assertRaises(ConflictingDefinitionError):
1773 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1774 with self.assertRaises(ConflictingDefinitionError):
1775 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1776 with self.assertRaises(ConflictingDefinitionError):
1777 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1778 with self.assertRaises(ConflictingDefinitionError):
1779 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1780 with self.assertRaises(ConflictingDefinitionError):
1781 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1782 with self.assertRaises(ConflictingDefinitionError):
1783 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1784 with self.assertRaises(ConflictingDefinitionError):
1785 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1786 with self.assertRaises(ConflictingDefinitionError):
1787 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1788 # We should be able to certify 3a with a range overlapping that window,
1789 # because it's for a different detector.
1790 # We'll certify 3a over [t1, t3).
1791 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1792 # Now we'll certify 2b and 3b together over [t4, ∞).
1793 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1795 # Fetch all associations and check that they are what we expect.
1796 self.assertCountEqual(
1797 list(
1798 registry.queryDatasetAssociations(
1799 "bias",
1800 collections=[collection, "imported_g", "imported_r"],
1801 )
1802 ),
1803 [
1804 DatasetAssociation(
1805 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1806 collection="imported_g",
1807 timespan=None,
1808 ),
1809 DatasetAssociation(
1810 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1811 collection="imported_r",
1812 timespan=None,
1813 ),
1814 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1815 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1816 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1817 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1818 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1819 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1820 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1821 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1822 ]
1823 )
1825 class Ambiguous:
1826 """Tag class to denote lookups that are expected to be ambiguous.
1827 """
1828 pass
1830 def assertLookup(detector: int, timespan: Timespan,
1831 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1832 """Local function that asserts that a bias lookup returns the given
1833 expected result.
1834 """
1835 if expected is Ambiguous:
1836 with self.assertRaises(RuntimeError):
1837 registry.findDataset("bias", collections=collection, instrument="Cam1",
1838 detector=detector, timespan=timespan)
1839 else:
1840 self.assertEqual(
1841 expected,
1842 registry.findDataset("bias", collections=collection, instrument="Cam1",
1843 detector=detector, timespan=timespan)
1844 )
1846 # Systematically test lookups against expected results.
1847 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1848 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1849 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1850 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1851 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1852 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1853 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1854 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1855 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1856 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1857 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1858 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1859 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1860 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1861 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1862 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1863 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1864 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1865 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1866 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1867 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1868 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1869 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1870 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1871 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1872 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1873 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1874 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1875 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1876 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1877 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1878 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1879 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1880 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1881 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1882 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1883 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1884 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1885 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1886 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1887 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1888 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1890 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1891 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1892 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1893 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1894 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1895 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1896 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1897 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1898 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1899 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1900 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1901 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1902 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1903 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1904 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1905 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1906 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1907 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1908 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1909 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1910 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1911 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1912 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1913 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1914 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1915 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1916 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1917 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1918 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1919 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1920 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1921 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1922 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1923 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1924 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1925 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1926 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1927 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1928 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1929 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1930 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1931 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1932 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1933 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1934 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1935 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1937 # Decertify everything, this time with explicit data IDs, then check
1938 # that no lookups succeed.
1939 registry.decertify(
1940 collection, "bias", Timespan(None, None),
1941 dataIds=[
1942 dict(instrument="Cam1", detector=2),
1943 dict(instrument="Cam1", detector=3),
1944 ]
1945 )
1946 for detector in (2, 3):
1947 for timespan in allTimespans:
1948 assertLookup(detector=detector, timespan=timespan, expected=None)
1949 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1950 # those.
1951 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1952 for timespan in allTimespans:
1953 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1954 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1955 # Decertify just bias2 over [t2, t4).
1956 # This should split a single certification row into two (and leave the
1957 # other existing row, for bias3a, alone).
1958 registry.decertify(collection, "bias", Timespan(t2, t4),
1959 dataIds=[dict(instrument="Cam1", detector=2)])
1960 for timespan in allTimespans:
1961 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1962 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1963 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1964 if overlapsBefore and overlapsAfter:
1965 expected = Ambiguous
1966 elif overlapsBefore or overlapsAfter:
1967 expected = bias2a
1968 else:
1969 expected = None
1970 assertLookup(detector=2, timespan=timespan, expected=expected)
1972 def testIngestTimeQuery(self):
1974 registry = self.makeRegistry()
1975 self.loadData(registry, "base.yaml")
1976 dt0 = datetime.utcnow()
1977 self.loadData(registry, "datasets.yaml")
1978 dt1 = datetime.utcnow()
1980 datasets = list(registry.queryDatasets(..., collections=...))
1981 len0 = len(datasets)
1982 self.assertGreater(len0, 0)
1984 where = "ingest_date > T'2000-01-01'"
1985 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1986 len1 = len(datasets)
1987 self.assertEqual(len0, len1)
1989 # no one will ever use this piece of software in 30 years
1990 where = "ingest_date > T'2050-01-01'"
1991 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1992 len2 = len(datasets)
1993 self.assertEqual(len2, 0)
1995 # Check more exact timing to make sure there is no 37 seconds offset
1996 # (after fixing DM-30124). SQLite time precision is 1 second, make
1997 # sure that we don't test with higher precision.
1998 tests = [
1999 # format: (timestamp, operator, expected_len)
2000 (dt0 - timedelta(seconds=1), ">", len0),
2001 (dt0 - timedelta(seconds=1), "<", 0),
2002 (dt1 + timedelta(seconds=1), "<", len0),
2003 (dt1 + timedelta(seconds=1), ">", 0),
2004 ]
2005 for dt, op, expect_len in tests:
2006 dt_str = dt.isoformat(sep=" ")
2008 where = f"ingest_date {op} T'{dt_str}'"
2009 datasets = list(registry.queryDatasets(..., collections=..., where=where))
2010 self.assertEqual(len(datasets), expect_len)
2012 # same with bind using datetime or astropy Time
2013 where = f"ingest_date {op} ingest_time"
2014 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2015 bind={"ingest_time": dt}))
2016 self.assertEqual(len(datasets), expect_len)
2018 dt_astropy = astropy.time.Time(dt, format="datetime")
2019 datasets = list(registry.queryDatasets(..., collections=..., where=where,
2020 bind={"ingest_time": dt_astropy}))
2021 self.assertEqual(len(datasets), expect_len)
2023 def testTimespanQueries(self):
2024 """Test query expressions involving timespans.
2025 """
2026 registry = self.makeRegistry()
2027 self.loadData(registry, "hsc-rc2-subset.yaml")
2028 # All exposures in the database; mapping from ID to timespan.
2029 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
2030 # Just those IDs, sorted (which is also temporal sorting, because HSC
2031 # exposure IDs are monotonically increasing).
2032 ids = sorted(visits.keys())
2033 self.assertGreater(len(ids), 20)
2034 # Pick some quasi-random indexes into `ids` to play with.
2035 i1 = int(len(ids)*0.1)
2036 i2 = int(len(ids)*0.3)
2037 i3 = int(len(ids)*0.6)
2038 i4 = int(len(ids)*0.8)
2039 # Extract some times from those: just before the beginning of i1 (which
2040 # should be after the end of the exposure before), exactly the
2041 # beginning of i2, just after the beginning of i3 (and before its end),
2042 # and the exact end of i4.
2043 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
2044 self.assertGreater(t1, visits[ids[i1 - 1]].end)
2045 t2 = visits[ids[i2]].begin
2046 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
2047 self.assertLess(t3, visits[ids[i3]].end)
2048 t4 = visits[ids[i4]].end
2049 # Make sure those are actually in order.
2050 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
2052 bind = {
2053 "t1": t1,
2054 "t2": t2,
2055 "t3": t3,
2056 "t4": t4,
2057 "ts23": Timespan(t2, t3),
2058 }
2060 def query(where):
2061 """Helper function that queries for visit data IDs and returns
2062 results as a sorted, deduplicated list of visit IDs.
2063 """
2064 return sorted(
2065 {dataId["visit"] for dataId in registry.queryDataIds("visit",
2066 instrument="HSC",
2067 bind=bind,
2068 where=where)}
2069 )
2071 # Try a bunch of timespan queries, mixing up the bounds themselves,
2072 # where they appear in the expression, and how we get the timespan into
2073 # the expression.
2075 # t1 is before the start of i1, so this should not include i1.
2076 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
2077 # t2 is exactly at the start of i2, but ends are exclusive, so these
2078 # should not include i2.
2079 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
2080 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
2081 # t3 is in the middle of i3, so this should include i3.
2082 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
2083 # This one should not include t3 by the same reasoning.
2084 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
2085 # t4 is exactly at the end of i4, so this should include i4.
2086 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
2087 # i4's upper bound of t4 is exclusive so this should not include t4.
2088 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
2090 # Now some timespan vs. time scalar queries.
2091 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
2092 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
2093 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
2094 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
2095 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
2096 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
2098 # Empty timespans should not overlap anything.
2099 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
2101 def testCollectionSummaries(self):
2102 """Test recording and retrieval of collection summaries.
2103 """
2104 self.maxDiff = None
2105 registry = self.makeRegistry()
2106 # Importing datasets from yaml should go through the code path where
2107 # we update collection summaries as we insert datasets.
2108 self.loadData(registry, "base.yaml")
2109 self.loadData(registry, "datasets.yaml")
2110 flat = registry.getDatasetType("flat")
2111 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
2112 expected1.datasetTypes.add(registry.getDatasetType("bias"))
2113 expected1.datasetTypes.add(flat)
2114 expected1.dimensions.update_extract(
2115 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
2116 )
2117 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2118 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2119 # Create a chained collection with both of the imported runs; the
2120 # summary should be the same, because it's a union with itself.
2121 chain = "chain"
2122 registry.registerCollection(chain, CollectionType.CHAINED)
2123 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
2124 self.assertEqual(registry.getCollectionSummary(chain), expected1)
2125 # Associate flats only into a tagged collection and a calibration
2126 # collection to check summaries of those.
2127 tag = "tag"
2128 registry.registerCollection(tag, CollectionType.TAGGED)
2129 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
2130 calibs = "calibs"
2131 registry.registerCollection(calibs, CollectionType.CALIBRATION)
2132 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
2133 timespan=Timespan(None, None))
2134 expected2 = expected1.copy()
2135 expected2.datasetTypes.discard("bias")
2136 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2137 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2138 # Explicitly calling Registry.refresh() should load those same
2139 # summaries, via a totally different code path.
2140 registry.refresh()
2141 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
2142 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
2143 self.assertEqual(registry.getCollectionSummary(tag), expected2)
2144 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
2146 def testUnrelatedDimensionQueries(self):
2147 """Test that WHERE expressions in queries can reference dimensions that
2148 are not in the result set.
2149 """
2150 registry = self.makeRegistry()
2151 # There is no data to back this query, but it should still return
2152 # zero records instead of raising.
2153 self.assertFalse(
2154 set(registry.queryDataIds(["visit", "detector"],
2155 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
2156 )
2158 def testBindInQueryDatasets(self):
2159 """Test that the bind parameter is correctly forwarded in
2160 queryDatasets recursion.
2161 """
2162 registry = self.makeRegistry()
2163 # Importing datasets from yaml should go through the code path where
2164 # we update collection summaries as we insert datasets.
2165 self.loadData(registry, "base.yaml")
2166 self.loadData(registry, "datasets.yaml")
2167 self.assertEqual(
2168 set(registry.queryDatasets("flat", band="r", collections=...)),
2169 set(registry.queryDatasets("flat", where="band=my_band", bind={"my_band": "r"}, collections=...)),
2170 )