Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union, TYPE_CHECKING
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from ..summaries import CollectionSummary
56from .._collectionType import CollectionType
57from .._config import RegistryConfig
59from .._exceptions import (
60 ConflictingDefinitionError,
61 InconsistentDataIdError,
62 MissingCollectionError,
63 OrphanedRecordError,
64)
65from ..interfaces import ButlerAttributeExistsError
67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true
68 from .._registry import Registry
71class RegistryTests(ABC):
72 """Generic tests for the `Registry` class that can be subclassed to
73 generate tests for different configurations.
74 """
76 collectionsManager: Optional[str] = None
77 """Name of the collections manager class, if subclass provides value for
78 this member then it overrides name specified in default configuration
79 (`str`).
80 """
82 @classmethod
83 @abstractmethod
84 def getDataDir(cls) -> str:
85 """Return the root directory containing test data YAML files.
86 """
87 raise NotImplementedError()
89 def makeRegistryConfig(self) -> RegistryConfig:
90 """Create RegistryConfig used to create a registry.
92 This method should be called by a subclass from `makeRegistry`.
93 Returned instance will be pre-configured based on the values of class
94 members, and default-configured for all other parametrs. Subclasses
95 that need default configuration should just instantiate
96 `RegistryConfig` directly.
97 """
98 config = RegistryConfig()
99 if self.collectionsManager:
100 config["managers"]["collections"] = self.collectionsManager
101 return config
103 @abstractmethod
104 def makeRegistry(self) -> Registry:
105 """Return the Registry instance to be tested.
106 """
107 raise NotImplementedError()
109 def loadData(self, registry: Registry, filename: str):
110 """Load registry test data from ``getDataDir/<filename>``,
111 which should be a YAML import/export file.
112 """
113 from ...transfers import YamlRepoImportBackend
114 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
115 backend = YamlRepoImportBackend(stream, registry)
116 backend.register()
117 backend.load(datastore=None)
119 def testOpaque(self):
120 """Tests for `Registry.registerOpaqueTable`,
121 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
122 `Registry.deleteOpaqueData`.
123 """
124 registry = self.makeRegistry()
125 table = "opaque_table_for_testing"
126 registry.registerOpaqueTable(
127 table,
128 spec=ddl.TableSpec(
129 fields=[
130 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
131 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
132 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
133 ],
134 )
135 )
136 rows = [
137 {"id": 1, "name": "one", "count": None},
138 {"id": 2, "name": "two", "count": 5},
139 {"id": 3, "name": "three", "count": 6},
140 ]
141 registry.insertOpaqueData(table, *rows)
142 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
143 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
144 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
145 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
146 registry.deleteOpaqueData(table, id=3)
147 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
148 registry.deleteOpaqueData(table)
149 self.assertEqual([], list(registry.fetchOpaqueData(table)))
151 def testDatasetType(self):
152 """Tests for `Registry.registerDatasetType` and
153 `Registry.getDatasetType`.
154 """
155 registry = self.makeRegistry()
156 # Check valid insert
157 datasetTypeName = "test"
158 storageClass = StorageClass("testDatasetType")
159 registry.storageClasses.registerStorageClass(storageClass)
160 dimensions = registry.dimensions.extract(("instrument", "visit"))
161 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
162 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
163 # Inserting for the first time should return True
164 self.assertTrue(registry.registerDatasetType(inDatasetType))
165 outDatasetType1 = registry.getDatasetType(datasetTypeName)
166 self.assertEqual(outDatasetType1, inDatasetType)
168 # Re-inserting should work
169 self.assertFalse(registry.registerDatasetType(inDatasetType))
170 # Except when they are not identical
171 with self.assertRaises(ConflictingDefinitionError):
172 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
173 registry.registerDatasetType(nonIdenticalDatasetType)
175 # Template can be None
176 datasetTypeName = "testNoneTemplate"
177 storageClass = StorageClass("testDatasetType2")
178 registry.storageClasses.registerStorageClass(storageClass)
179 dimensions = registry.dimensions.extract(("instrument", "visit"))
180 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
181 registry.registerDatasetType(inDatasetType)
182 outDatasetType2 = registry.getDatasetType(datasetTypeName)
183 self.assertEqual(outDatasetType2, inDatasetType)
185 allTypes = set(registry.queryDatasetTypes())
186 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
188 def testDimensions(self):
189 """Tests for `Registry.insertDimensionData`,
190 `Registry.syncDimensionData`, and `Registry.expandDataId`.
191 """
192 registry = self.makeRegistry()
193 dimensionName = "instrument"
194 dimension = registry.dimensions[dimensionName]
195 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
196 "class_name": "lsst.obs.base.Instrument"}
197 registry.insertDimensionData(dimensionName, dimensionValue)
198 # Inserting the same value twice should fail
199 with self.assertRaises(sqlalchemy.exc.IntegrityError):
200 registry.insertDimensionData(dimensionName, dimensionValue)
201 # expandDataId should retrieve the record we just inserted
202 self.assertEqual(
203 registry.expandDataId(
204 instrument="DummyCam",
205 graph=dimension.graph
206 ).records[dimensionName].toDict(),
207 dimensionValue
208 )
209 # expandDataId should raise if there is no record with the given ID.
210 with self.assertRaises(LookupError):
211 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
212 # band doesn't have a table; insert should fail.
213 with self.assertRaises(TypeError):
214 registry.insertDimensionData("band", {"band": "i"})
215 dimensionName2 = "physical_filter"
216 dimension2 = registry.dimensions[dimensionName2]
217 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
218 # Missing required dependency ("instrument") should fail
219 with self.assertRaises(KeyError):
220 registry.insertDimensionData(dimensionName2, dimensionValue2)
221 # Adding required dependency should fix the failure
222 dimensionValue2["instrument"] = "DummyCam"
223 registry.insertDimensionData(dimensionName2, dimensionValue2)
224 # expandDataId should retrieve the record we just inserted.
225 self.assertEqual(
226 registry.expandDataId(
227 instrument="DummyCam", physical_filter="DummyCam_i",
228 graph=dimension2.graph
229 ).records[dimensionName2].toDict(),
230 dimensionValue2
231 )
232 # Use syncDimensionData to insert a new record successfully.
233 dimensionName3 = "detector"
234 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
235 "name_in_raft": "zero", "purpose": "SCIENCE"}
236 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
237 # Sync that again. Note that one field ("raft") is NULL, and that
238 # should be okay.
239 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
240 # Now try that sync with the same primary key but a different value.
241 # This should fail.
242 with self.assertRaises(ConflictingDefinitionError):
243 registry.syncDimensionData(
244 dimensionName3,
245 {"instrument": "DummyCam", "id": 1, "full_name": "one",
246 "name_in_raft": "four", "purpose": "SCIENCE"}
247 )
249 @unittest.skipIf(np is None, "numpy not available.")
250 def testNumpyDataId(self):
251 """Test that we can use a numpy int in a dataId."""
252 registry = self.makeRegistry()
253 dimensionEntries = [
254 ("instrument", {"instrument": "DummyCam"}),
255 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
256 # Using an np.int64 here fails unless Records.fromDict is also
257 # patched to look for numbers.Integral
258 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
259 ]
260 for args in dimensionEntries:
261 registry.insertDimensionData(*args)
263 # Try a normal integer and something that looks like an int but
264 # is not.
265 for visit_id in (42, np.int64(42)):
266 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
267 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
268 self.assertEqual(expanded["visit"], int(visit_id))
269 self.assertIsInstance(expanded["visit"], int)
271 def testDataIdRelationships(self):
272 """Test that `Registry.expandDataId` raises an exception when the given
273 keys are inconsistent.
274 """
275 registry = self.makeRegistry()
276 self.loadData(registry, "base.yaml")
277 # Insert a few more dimension records for the next test.
278 registry.insertDimensionData(
279 "exposure",
280 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
281 )
282 registry.insertDimensionData(
283 "exposure",
284 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
285 )
286 registry.insertDimensionData(
287 "visit_system",
288 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
289 )
290 registry.insertDimensionData(
291 "visit",
292 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
293 )
294 registry.insertDimensionData(
295 "visit_definition",
296 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
297 )
298 with self.assertRaises(InconsistentDataIdError):
299 registry.expandDataId(
300 {"instrument": "Cam1", "visit": 1, "exposure": 2},
301 )
303 def testDataset(self):
304 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
305 and `Registry.removeDatasets`.
306 """
307 registry = self.makeRegistry()
308 self.loadData(registry, "base.yaml")
309 run = "test"
310 registry.registerRun(run)
311 datasetType = registry.getDatasetType("bias")
312 dataId = {"instrument": "Cam1", "detector": 2}
313 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
314 outRef = registry.getDataset(ref.id)
315 self.assertIsNotNone(ref.id)
316 self.assertEqual(ref, outRef)
317 with self.assertRaises(ConflictingDefinitionError):
318 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
319 registry.removeDatasets([ref])
320 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
322 def testFindDataset(self):
323 """Tests for `Registry.findDataset`.
324 """
325 registry = self.makeRegistry()
326 self.loadData(registry, "base.yaml")
327 run = "test"
328 datasetType = registry.getDatasetType("bias")
329 dataId = {"instrument": "Cam1", "detector": 4}
330 registry.registerRun(run)
331 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
332 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
333 self.assertEqual(outputRef, inputRef)
334 # Check that retrieval with invalid dataId raises
335 with self.assertRaises(LookupError):
336 dataId = {"instrument": "Cam1"} # no detector
337 registry.findDataset(datasetType, dataId, collections=run)
338 # Check that different dataIds match to different datasets
339 dataId1 = {"instrument": "Cam1", "detector": 1}
340 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
341 dataId2 = {"instrument": "Cam1", "detector": 2}
342 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
343 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
344 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
345 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
346 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
347 # Check that requesting a non-existing dataId returns None
348 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
349 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
351 def testRemoveDatasetTypeSuccess(self):
352 """Test that Registry.removeDatasetType works when there are no
353 datasets of that type present.
354 """
355 registry = self.makeRegistry()
356 self.loadData(registry, "base.yaml")
357 registry.removeDatasetType("flat")
358 with self.assertRaises(KeyError):
359 registry.getDatasetType("flat")
361 def testRemoveDatasetTypeFailure(self):
362 """Test that Registry.removeDatasetType raises when there are datasets
363 of that type present or if the dataset type is for a component.
364 """
365 registry = self.makeRegistry()
366 self.loadData(registry, "base.yaml")
367 self.loadData(registry, "datasets.yaml")
368 with self.assertRaises(OrphanedRecordError):
369 registry.removeDatasetType("flat")
370 with self.assertRaises(ValueError):
371 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
373 def testDatasetTypeComponentQueries(self):
374 """Test component options when querying for dataset types.
375 """
376 registry = self.makeRegistry()
377 self.loadData(registry, "base.yaml")
378 self.loadData(registry, "datasets.yaml")
379 # Test querying for dataset types with different inputs.
380 # First query for all dataset types; components should only be included
381 # when components=True.
382 self.assertEqual(
383 {"bias", "flat"},
384 NamedValueSet(registry.queryDatasetTypes()).names
385 )
386 self.assertEqual(
387 {"bias", "flat"},
388 NamedValueSet(registry.queryDatasetTypes(components=False)).names
389 )
390 self.assertLess(
391 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
392 NamedValueSet(registry.queryDatasetTypes(components=True)).names
393 )
394 # Use a pattern that can match either parent or components. Again,
395 # components are only returned if components=True.
396 self.assertEqual(
397 {"bias"},
398 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
399 )
400 self.assertEqual(
401 {"bias"},
402 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
403 )
404 self.assertLess(
405 {"bias", "bias.wcs"},
406 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
407 )
408 # This pattern matches only a component. In this case we also return
409 # that component dataset type if components=None.
410 self.assertEqual(
411 {"bias.wcs"},
412 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
413 )
414 self.assertEqual(
415 set(),
416 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
417 )
418 self.assertEqual(
419 {"bias.wcs"},
420 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
421 )
422 # Add a dataset type using a StorageClass that we'll then remove; check
423 # that this does not affect our ability to query for dataset types
424 # (though it will warn).
425 tempStorageClass = StorageClass(
426 name="TempStorageClass",
427 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
428 )
429 registry.storageClasses.registerStorageClass(tempStorageClass)
430 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
431 universe=registry.dimensions)
432 registry.registerDatasetType(datasetType)
433 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
434 datasetType._storageClass = None
435 del tempStorageClass
436 # Querying for all dataset types, including components, should include
437 # at least all non-component dataset types (and I don't want to
438 # enumerate all of the Exposure components for bias and flat here).
439 with self.assertLogs("lsst.daf.butler.registry._sqlRegistry", logging.WARN) as cm:
440 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
441 self.assertIn("TempStorageClass", cm.output[0])
442 self.assertLess({"bias", "flat", "temporary"}, everything.names)
443 # It should not include "temporary.columns", because we tried to remove
444 # the storage class that would tell it about that. So if the next line
445 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
446 # this part of the test isn't doing anything, because the _unregister
447 # call about isn't simulating the real-life case we want it to
448 # simulate, in which different versions of daf_butler in entirely
449 # different Python processes interact with the same repo.
450 self.assertNotIn("temporary.data", everything.names)
451 # Query for dataset types that start with "temp". This should again
452 # not include the component, and also not fail.
453 with self.assertLogs("lsst.daf.butler.registry._sqlRegistry", logging.WARN) as cm:
454 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
455 self.assertIn("TempStorageClass", cm.output[0])
456 self.assertEqual({"temporary"}, startsWithTemp.names)
458 def testComponentLookups(self):
459 """Test searching for component datasets via their parents.
460 """
461 registry = self.makeRegistry()
462 self.loadData(registry, "base.yaml")
463 self.loadData(registry, "datasets.yaml")
464 # Test getting the child dataset type (which does still exist in the
465 # Registry), and check for consistency with
466 # DatasetRef.makeComponentRef.
467 collection = "imported_g"
468 parentType = registry.getDatasetType("bias")
469 childType = registry.getDatasetType("bias.wcs")
470 parentRefResolved = registry.findDataset(parentType, collections=collection,
471 instrument="Cam1", detector=1)
472 self.assertIsInstance(parentRefResolved, DatasetRef)
473 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
474 # Search for a single dataset with findDataset.
475 childRef1 = registry.findDataset("bias.wcs", collections=collection,
476 dataId=parentRefResolved.dataId)
477 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
478 # Search for detector data IDs constrained by component dataset
479 # existence with queryDataIds.
480 dataIds = registry.queryDataIds(
481 ["detector"],
482 datasets=["bias.wcs"],
483 collections=collection,
484 ).toSet()
485 self.assertEqual(
486 dataIds,
487 DataCoordinateSet(
488 {
489 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
490 for d in (1, 2, 3)
491 },
492 parentType.dimensions,
493 )
494 )
495 # Search for multiple datasets of a single type with queryDatasets.
496 childRefs2 = set(registry.queryDatasets(
497 "bias.wcs",
498 collections=collection,
499 ))
500 self.assertEqual(
501 {ref.unresolved() for ref in childRefs2},
502 {DatasetRef(childType, dataId) for dataId in dataIds}
503 )
505 def testCollections(self):
506 """Tests for registry methods that manage collections.
507 """
508 registry = self.makeRegistry()
509 self.loadData(registry, "base.yaml")
510 self.loadData(registry, "datasets.yaml")
511 run1 = "imported_g"
512 run2 = "imported_r"
513 # Test setting a collection docstring after it has been created.
514 registry.setCollectionDocumentation(run1, "doc for run1")
515 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
516 registry.setCollectionDocumentation(run1, None)
517 self.assertIsNone(registry.getCollectionDocumentation(run1))
518 datasetType = "bias"
519 # Find some datasets via their run's collection.
520 dataId1 = {"instrument": "Cam1", "detector": 1}
521 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
522 self.assertIsNotNone(ref1)
523 dataId2 = {"instrument": "Cam1", "detector": 2}
524 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
525 self.assertIsNotNone(ref2)
526 # Associate those into a new collection,then look for them there.
527 tag1 = "tag1"
528 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
529 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
530 registry.associate(tag1, [ref1, ref2])
531 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
532 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
533 # Disassociate one and verify that we can't it there anymore...
534 registry.disassociate(tag1, [ref1])
535 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
536 # ...but we can still find ref2 in tag1, and ref1 in the run.
537 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
538 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
539 collections = set(registry.queryCollections())
540 self.assertEqual(collections, {run1, run2, tag1})
541 # Associate both refs into tag1 again; ref2 is already there, but that
542 # should be a harmless no-op.
543 registry.associate(tag1, [ref1, ref2])
544 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
545 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
546 # Get a different dataset (from a different run) that has the same
547 # dataset type and data ID as ref2.
548 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
549 self.assertNotEqual(ref2, ref2b)
550 # Attempting to associate that into tag1 should be an error.
551 with self.assertRaises(ConflictingDefinitionError):
552 registry.associate(tag1, [ref2b])
553 # That error shouldn't have messed up what we had before.
554 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
555 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
556 # Attempt to associate the conflicting dataset again, this time with
557 # a dataset that isn't in the collection and won't cause a conflict.
558 # Should also fail without modifying anything.
559 dataId3 = {"instrument": "Cam1", "detector": 3}
560 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
561 with self.assertRaises(ConflictingDefinitionError):
562 registry.associate(tag1, [ref3, ref2b])
563 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
564 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
565 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
566 # Register a chained collection that searches [tag1, run2]
567 chain1 = "chain1"
568 registry.registerCollection(chain1, type=CollectionType.CHAINED)
569 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
570 # Chained collection exists, but has no collections in it.
571 self.assertFalse(registry.getCollectionChain(chain1))
572 # If we query for all collections, we should get the chained collection
573 # only if we don't ask to flatten it (i.e. yield only its children).
574 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
575 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
576 # Attempt to set its child collections to something circular; that
577 # should fail.
578 with self.assertRaises(ValueError):
579 registry.setCollectionChain(chain1, [tag1, chain1])
580 # Add the child collections.
581 registry.setCollectionChain(chain1, [tag1, run2])
582 self.assertEqual(
583 list(registry.getCollectionChain(chain1)),
584 [tag1, run2]
585 )
586 # Searching for dataId1 or dataId2 in the chain should return ref1 and
587 # ref2, because both are in tag1.
588 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
589 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
590 # Now disassociate ref2 from tag1. The search (for bias) with
591 # dataId2 in chain1 should then:
592 # 1. not find it in tag1
593 # 2. find a different dataset in run2
594 registry.disassociate(tag1, [ref2])
595 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
596 self.assertNotEqual(ref2b, ref2)
597 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
598 # Define a new chain so we can test recursive chains.
599 chain2 = "chain2"
600 registry.registerCollection(chain2, type=CollectionType.CHAINED)
601 registry.setCollectionChain(chain2, [run2, chain1])
602 # Query for collections matching a regex.
603 self.assertCountEqual(
604 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
605 ["imported_r", "imported_g"]
606 )
607 # Query for collections matching a regex or an explicit str.
608 self.assertCountEqual(
609 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
610 ["imported_r", "imported_g", "chain1"]
611 )
612 # Search for bias with dataId1 should find it via tag1 in chain2,
613 # recursing, because is not in run1.
614 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
615 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
616 # Search for bias with dataId2 should find it in run2 (ref2b).
617 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
618 # Search for a flat that is in run2. That should not be found
619 # at the front of chain2, because of the restriction to bias
620 # on run2 there, but it should be found in at the end of chain1.
621 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
622 ref4 = registry.findDataset("flat", dataId4, collections=run2)
623 self.assertIsNotNone(ref4)
624 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
625 # Deleting a collection that's part of a CHAINED collection is not
626 # allowed, and is exception-safe.
627 with self.assertRaises(Exception):
628 registry.removeCollection(run2)
629 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
630 with self.assertRaises(Exception):
631 registry.removeCollection(chain1)
632 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
633 # Actually remove chain2, test that it's gone by asking for its type.
634 registry.removeCollection(chain2)
635 with self.assertRaises(MissingCollectionError):
636 registry.getCollectionType(chain2)
637 # Actually remove run2 and chain1, which should work now.
638 registry.removeCollection(chain1)
639 registry.removeCollection(run2)
640 with self.assertRaises(MissingCollectionError):
641 registry.getCollectionType(run2)
642 with self.assertRaises(MissingCollectionError):
643 registry.getCollectionType(chain1)
644 # Remove tag1 as well, just to test that we can remove TAGGED
645 # collections.
646 registry.removeCollection(tag1)
647 with self.assertRaises(MissingCollectionError):
648 registry.getCollectionType(tag1)
650 def testCollectionChainFlatten(self):
651 """Test that Registry.setCollectionChain obeys its 'flatten' option.
652 """
653 registry = self.makeRegistry()
654 registry.registerCollection("inner", CollectionType.CHAINED)
655 registry.registerCollection("innermost", CollectionType.RUN)
656 registry.setCollectionChain("inner", ["innermost"])
657 registry.registerCollection("outer", CollectionType.CHAINED)
658 registry.setCollectionChain("outer", ["inner"], flatten=False)
659 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
660 registry.setCollectionChain("outer", ["inner"], flatten=True)
661 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
663 def testBasicTransaction(self):
664 """Test that all operations within a single transaction block are
665 rolled back if an exception propagates out of the block.
666 """
667 registry = self.makeRegistry()
668 storageClass = StorageClass("testDatasetType")
669 registry.storageClasses.registerStorageClass(storageClass)
670 with registry.transaction():
671 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
672 with self.assertRaises(ValueError):
673 with registry.transaction():
674 registry.insertDimensionData("instrument", {"name": "Cam2"})
675 raise ValueError("Oops, something went wrong")
676 # Cam1 should exist
677 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
678 # But Cam2 and Cam3 should both not exist
679 with self.assertRaises(LookupError):
680 registry.expandDataId(instrument="Cam2")
681 with self.assertRaises(LookupError):
682 registry.expandDataId(instrument="Cam3")
684 def testNestedTransaction(self):
685 """Test that operations within a transaction block are not rolled back
686 if an exception propagates out of an inner transaction block and is
687 then caught.
688 """
689 registry = self.makeRegistry()
690 dimension = registry.dimensions["instrument"]
691 dataId1 = {"instrument": "DummyCam"}
692 dataId2 = {"instrument": "DummyCam2"}
693 checkpointReached = False
694 with registry.transaction():
695 # This should be added and (ultimately) committed.
696 registry.insertDimensionData(dimension, dataId1)
697 with self.assertRaises(sqlalchemy.exc.IntegrityError):
698 with registry.transaction(savepoint=True):
699 # This does not conflict, and should succeed (but not
700 # be committed).
701 registry.insertDimensionData(dimension, dataId2)
702 checkpointReached = True
703 # This should conflict and raise, triggerring a rollback
704 # of the previous insertion within the same transaction
705 # context, but not the original insertion in the outer
706 # block.
707 registry.insertDimensionData(dimension, dataId1)
708 self.assertTrue(checkpointReached)
709 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
710 with self.assertRaises(LookupError):
711 registry.expandDataId(dataId2, graph=dimension.graph)
713 def testInstrumentDimensions(self):
714 """Test queries involving only instrument dimensions, with no joins to
715 skymap."""
716 registry = self.makeRegistry()
718 # need a bunch of dimensions and datasets for test
719 registry.insertDimensionData(
720 "instrument",
721 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
722 )
723 registry.insertDimensionData(
724 "physical_filter",
725 dict(instrument="DummyCam", name="dummy_r", band="r"),
726 dict(instrument="DummyCam", name="dummy_i", band="i"),
727 )
728 registry.insertDimensionData(
729 "detector",
730 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
731 )
732 registry.insertDimensionData(
733 "visit_system",
734 dict(instrument="DummyCam", id=1, name="default"),
735 )
736 registry.insertDimensionData(
737 "visit",
738 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
739 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
740 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
741 )
742 registry.insertDimensionData(
743 "exposure",
744 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
745 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
746 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
747 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
748 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
749 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
750 )
751 registry.insertDimensionData(
752 "visit_definition",
753 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
754 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
755 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
756 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
757 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
758 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
759 )
760 # dataset types
761 run1 = "test1_r"
762 run2 = "test2_r"
763 tagged2 = "test2_t"
764 registry.registerRun(run1)
765 registry.registerRun(run2)
766 registry.registerCollection(tagged2)
767 storageClass = StorageClass("testDataset")
768 registry.storageClasses.registerStorageClass(storageClass)
769 rawType = DatasetType(name="RAW",
770 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
771 storageClass=storageClass)
772 registry.registerDatasetType(rawType)
773 calexpType = DatasetType(name="CALEXP",
774 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
775 storageClass=storageClass)
776 registry.registerDatasetType(calexpType)
778 # add pre-existing datasets
779 for exposure in (100, 101, 110, 111):
780 for detector in (1, 2, 3):
781 # note that only 3 of 5 detectors have datasets
782 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
783 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
784 # exposures 100 and 101 appear in both run1 and tagged2.
785 # 100 has different datasets in the different collections
786 # 101 has the same dataset in both collections.
787 if exposure == 100:
788 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
789 if exposure in (100, 101):
790 registry.associate(tagged2, [ref])
791 # Add pre-existing datasets to tagged2.
792 for exposure in (200, 201):
793 for detector in (3, 4, 5):
794 # note that only 3 of 5 detectors have datasets
795 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
796 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
797 registry.associate(tagged2, [ref])
799 dimensions = DimensionGraph(
800 registry.dimensions,
801 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
802 )
803 # Test that single dim string works as well as list of str
804 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
805 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
806 self.assertEqual(rows, rowsI)
807 # with empty expression
808 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
809 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
810 for dataId in rows:
811 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
812 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
813 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
814 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
815 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
816 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
817 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
818 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
819 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
820 (100, 101, 110, 111))
821 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
822 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
824 # second collection
825 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
826 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
827 for dataId in rows:
828 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
829 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
830 (100, 101, 200, 201))
831 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
832 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
834 # with two input datasets
835 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
836 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
837 for dataId in rows:
838 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
839 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
840 (100, 101, 110, 111, 200, 201))
841 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
842 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
844 # limit to single visit
845 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
846 where="visit = 10", instrument="DummyCam").toSet()
847 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
848 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
849 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
850 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
852 # more limiting expression, using link names instead of Table.column
853 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
854 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
855 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
856 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
857 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
858 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
860 # expression excludes everything
861 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
862 where="visit > 1000", instrument="DummyCam").toSet()
863 self.assertEqual(len(rows), 0)
865 # Selecting by physical_filter, this is not in the dimensions, but it
866 # is a part of the full expression so it should work too.
867 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
868 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
869 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
870 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
871 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
872 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
874 def testSkyMapDimensions(self):
875 """Tests involving only skymap dimensions, no joins to instrument."""
876 registry = self.makeRegistry()
878 # need a bunch of dimensions and datasets for test, we want
879 # "band" in the test so also have to add physical_filter
880 # dimensions
881 registry.insertDimensionData(
882 "instrument",
883 dict(instrument="DummyCam")
884 )
885 registry.insertDimensionData(
886 "physical_filter",
887 dict(instrument="DummyCam", name="dummy_r", band="r"),
888 dict(instrument="DummyCam", name="dummy_i", band="i"),
889 )
890 registry.insertDimensionData(
891 "skymap",
892 dict(name="DummyMap", hash="sha!".encode("utf8"))
893 )
894 for tract in range(10):
895 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
896 registry.insertDimensionData(
897 "patch",
898 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
899 for patch in range(10)]
900 )
902 # dataset types
903 run = "test"
904 registry.registerRun(run)
905 storageClass = StorageClass("testDataset")
906 registry.storageClasses.registerStorageClass(storageClass)
907 calexpType = DatasetType(name="deepCoadd_calexp",
908 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
909 "band")),
910 storageClass=storageClass)
911 registry.registerDatasetType(calexpType)
912 mergeType = DatasetType(name="deepCoadd_mergeDet",
913 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
914 storageClass=storageClass)
915 registry.registerDatasetType(mergeType)
916 measType = DatasetType(name="deepCoadd_meas",
917 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
918 "band")),
919 storageClass=storageClass)
920 registry.registerDatasetType(measType)
922 dimensions = DimensionGraph(
923 registry.dimensions,
924 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
925 | measType.dimensions.required)
926 )
928 # add pre-existing datasets
929 for tract in (1, 3, 5):
930 for patch in (2, 4, 6, 7):
931 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
932 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
933 for aFilter in ("i", "r"):
934 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
935 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
937 # with empty expression
938 rows = registry.queryDataIds(dimensions,
939 datasets=[calexpType, mergeType], collections=run).toSet()
940 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
941 for dataId in rows:
942 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
943 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
944 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
945 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
947 # limit to 2 tracts and 2 patches
948 rows = registry.queryDataIds(dimensions,
949 datasets=[calexpType, mergeType], collections=run,
950 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
951 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
952 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
953 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
954 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
956 # limit to single filter
957 rows = registry.queryDataIds(dimensions,
958 datasets=[calexpType, mergeType], collections=run,
959 where="band = 'i'").toSet()
960 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
961 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
962 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
963 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
965 # expression excludes everything, specifying non-existing skymap is
966 # not a fatal error, it's operator error
967 rows = registry.queryDataIds(dimensions,
968 datasets=[calexpType, mergeType], collections=run,
969 where="skymap = 'Mars'").toSet()
970 self.assertEqual(len(rows), 0)
972 def testSpatialJoin(self):
973 """Test queries that involve spatial overlap joins.
974 """
975 registry = self.makeRegistry()
976 self.loadData(registry, "hsc-rc2-subset.yaml")
978 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
979 # the TopologicalFamily they belong to. We'll relate all elements in
980 # each family to all of the elements in each other family.
981 families = defaultdict(set)
982 # Dictionary of {element.name: {dataId: region}}.
983 regions = {}
984 for element in registry.dimensions.getDatabaseElements():
985 if element.spatial is not None:
986 families[element.spatial.name].add(element)
987 regions[element.name] = {
988 record.dataId: record.region for record in registry.queryDimensionRecords(element)
989 }
991 # If this check fails, it's not necessarily a problem - it may just be
992 # a reasonable change to the default dimension definitions - but the
993 # test below depends on there being more than one family to do anything
994 # useful.
995 self.assertEqual(len(families), 2)
997 # Overlap DatabaseDimensionElements with each other.
998 for family1, family2 in itertools.combinations(families, 2):
999 for element1, element2 in itertools.product(families[family1], families[family2]):
1000 graph = DimensionGraph.union(element1.graph, element2.graph)
1001 # Construct expected set of overlapping data IDs via a
1002 # brute-force comparison of the regions we've already fetched.
1003 expected = {
1004 DataCoordinate.standardize(
1005 {**dataId1.byName(), **dataId2.byName()},
1006 graph=graph
1007 )
1008 for (dataId1, region1), (dataId2, region2)
1009 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1010 if not region1.isDisjointFrom(region2)
1011 }
1012 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1013 queried = set(registry.queryDataIds(graph))
1014 self.assertEqual(expected, queried)
1016 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1017 commonSkyPix = registry.dimensions.commonSkyPix
1018 for elementName, regions in regions.items():
1019 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1020 expected = set()
1021 for dataId, region in regions.items():
1022 for begin, end in commonSkyPix.pixelization.envelope(region):
1023 expected.update(
1024 DataCoordinate.standardize(
1025 {commonSkyPix.name: index, **dataId.byName()},
1026 graph=graph
1027 )
1028 for index in range(begin, end)
1029 )
1030 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1031 queried = set(registry.queryDataIds(graph))
1032 self.assertEqual(expected, queried)
1034 def testAbstractQuery(self):
1035 """Test that we can run a query that just lists the known
1036 bands. This is tricky because band is
1037 backed by a query against physical_filter.
1038 """
1039 registry = self.makeRegistry()
1040 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1041 registry.insertDimensionData(
1042 "physical_filter",
1043 dict(instrument="DummyCam", name="dummy_i", band="i"),
1044 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1045 dict(instrument="DummyCam", name="dummy_r", band="r"),
1046 )
1047 rows = registry.queryDataIds(["band"]).toSet()
1048 self.assertCountEqual(
1049 rows,
1050 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1051 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1052 )
1054 def testAttributeManager(self):
1055 """Test basic functionality of attribute manager.
1056 """
1057 # number of attributes with schema versions in a fresh database,
1058 # 6 managers with 3 records per manager, plus config for dimensions
1059 VERSION_COUNT = 6 * 3 + 1
1061 registry = self.makeRegistry()
1062 attributes = registry._managers.attributes
1064 # check what get() returns for non-existing key
1065 self.assertIsNone(attributes.get("attr"))
1066 self.assertEqual(attributes.get("attr", ""), "")
1067 self.assertEqual(attributes.get("attr", "Value"), "Value")
1068 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1070 # cannot store empty key or value
1071 with self.assertRaises(ValueError):
1072 attributes.set("", "value")
1073 with self.assertRaises(ValueError):
1074 attributes.set("attr", "")
1076 # set value of non-existing key
1077 attributes.set("attr", "value")
1078 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1079 self.assertEqual(attributes.get("attr"), "value")
1081 # update value of existing key
1082 with self.assertRaises(ButlerAttributeExistsError):
1083 attributes.set("attr", "value2")
1085 attributes.set("attr", "value2", force=True)
1086 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1087 self.assertEqual(attributes.get("attr"), "value2")
1089 # delete existing key
1090 self.assertTrue(attributes.delete("attr"))
1091 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1093 # delete non-existing key
1094 self.assertFalse(attributes.delete("non-attr"))
1096 # store bunch of keys and get the list back
1097 data = [
1098 ("version.core", "1.2.3"),
1099 ("version.dimensions", "3.2.1"),
1100 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1101 ]
1102 for key, value in data:
1103 attributes.set(key, value)
1104 items = dict(attributes.items())
1105 for key, value in data:
1106 self.assertEqual(items[key], value)
1108 def testQueryDatasetsDeduplication(self):
1109 """Test that the findFirst option to queryDatasets selects datasets
1110 from collections in the order given".
1111 """
1112 registry = self.makeRegistry()
1113 self.loadData(registry, "base.yaml")
1114 self.loadData(registry, "datasets.yaml")
1115 self.assertCountEqual(
1116 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1117 [
1118 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1119 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1120 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1121 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1122 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1123 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1124 ]
1125 )
1126 self.assertCountEqual(
1127 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1128 findFirst=True)),
1129 [
1130 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1131 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1132 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1133 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1134 ]
1135 )
1136 self.assertCountEqual(
1137 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1138 findFirst=True)),
1139 [
1140 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1141 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1142 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1143 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1144 ]
1145 )
1147 def testQueryResults(self):
1148 """Test querying for data IDs and then manipulating the QueryResults
1149 object returned to perform other queries.
1150 """
1151 registry = self.makeRegistry()
1152 self.loadData(registry, "base.yaml")
1153 self.loadData(registry, "datasets.yaml")
1154 bias = registry.getDatasetType("bias")
1155 flat = registry.getDatasetType("flat")
1156 # Obtain expected results from methods other than those we're testing
1157 # here. That includes:
1158 # - the dimensions of the data IDs we want to query:
1159 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1160 # - the dimensions of some other data IDs we'll extract from that:
1161 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1162 # - the data IDs we expect to obtain from the first queries:
1163 expectedDataIds = DataCoordinateSet(
1164 {
1165 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1166 universe=registry.dimensions)
1167 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1168 },
1169 graph=expectedGraph,
1170 hasFull=False,
1171 hasRecords=False,
1172 )
1173 # - the flat datasets we expect to find from those data IDs, in just
1174 # one collection (so deduplication is irrelevant):
1175 expectedFlats = [
1176 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1177 collections="imported_r"),
1178 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1179 collections="imported_r"),
1180 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1181 collections="imported_r"),
1182 ]
1183 # - the data IDs we expect to extract from that:
1184 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1185 # - the bias datasets we expect to find from those data IDs, after we
1186 # subset-out the physical_filter dimension, both with duplicates:
1187 expectedAllBiases = [
1188 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1189 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1190 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1191 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1192 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1193 ]
1194 # - ...and without duplicates:
1195 expectedDeduplicatedBiases = [
1196 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1197 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1198 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1199 ]
1200 # Test against those expected results, using a "lazy" query for the
1201 # data IDs (which re-executes that query each time we use it to do
1202 # something new).
1203 dataIds = registry.queryDataIds(
1204 ["detector", "physical_filter"],
1205 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1206 instrument="Cam1",
1207 )
1208 self.assertEqual(dataIds.graph, expectedGraph)
1209 self.assertEqual(dataIds.toSet(), expectedDataIds)
1210 self.assertCountEqual(
1211 list(
1212 dataIds.findDatasets(
1213 flat,
1214 collections=["imported_r"],
1215 )
1216 ),
1217 expectedFlats,
1218 )
1219 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1220 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1221 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1222 self.assertCountEqual(
1223 list(
1224 subsetDataIds.findDatasets(
1225 bias,
1226 collections=["imported_r", "imported_g"],
1227 findFirst=False
1228 )
1229 ),
1230 expectedAllBiases
1231 )
1232 self.assertCountEqual(
1233 list(
1234 subsetDataIds.findDatasets(
1235 bias,
1236 collections=["imported_r", "imported_g"],
1237 findFirst=True
1238 )
1239 ), expectedDeduplicatedBiases
1240 )
1241 # Materialize the bias dataset queries (only) by putting the results
1242 # into temporary tables, then repeat those tests.
1243 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1244 findFirst=False).materialize() as biases:
1245 self.assertCountEqual(list(biases), expectedAllBiases)
1246 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1247 findFirst=True).materialize() as biases:
1248 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1249 # Materialize the data ID subset query, but not the dataset queries.
1250 with subsetDataIds.materialize() as subsetDataIds:
1251 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1252 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1253 self.assertCountEqual(
1254 list(
1255 subsetDataIds.findDatasets(
1256 bias,
1257 collections=["imported_r", "imported_g"],
1258 findFirst=False
1259 )
1260 ),
1261 expectedAllBiases
1262 )
1263 self.assertCountEqual(
1264 list(
1265 subsetDataIds.findDatasets(
1266 bias,
1267 collections=["imported_r", "imported_g"],
1268 findFirst=True
1269 )
1270 ), expectedDeduplicatedBiases
1271 )
1272 # Materialize the dataset queries, too.
1273 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1274 findFirst=False).materialize() as biases:
1275 self.assertCountEqual(list(biases), expectedAllBiases)
1276 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1277 findFirst=True).materialize() as biases:
1278 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1279 # Materialize the original query, but none of the follow-up queries.
1280 with dataIds.materialize() as dataIds:
1281 self.assertEqual(dataIds.graph, expectedGraph)
1282 self.assertEqual(dataIds.toSet(), expectedDataIds)
1283 self.assertCountEqual(
1284 list(
1285 dataIds.findDatasets(
1286 flat,
1287 collections=["imported_r"],
1288 )
1289 ),
1290 expectedFlats,
1291 )
1292 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1293 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1294 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1295 self.assertCountEqual(
1296 list(
1297 subsetDataIds.findDatasets(
1298 bias,
1299 collections=["imported_r", "imported_g"],
1300 findFirst=False
1301 )
1302 ),
1303 expectedAllBiases
1304 )
1305 self.assertCountEqual(
1306 list(
1307 subsetDataIds.findDatasets(
1308 bias,
1309 collections=["imported_r", "imported_g"],
1310 findFirst=True
1311 )
1312 ), expectedDeduplicatedBiases
1313 )
1314 # Materialize just the bias dataset queries.
1315 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1316 findFirst=False).materialize() as biases:
1317 self.assertCountEqual(list(biases), expectedAllBiases)
1318 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1319 findFirst=True).materialize() as biases:
1320 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1321 # Materialize the subset data ID query, but not the dataset
1322 # queries.
1323 with subsetDataIds.materialize() as subsetDataIds:
1324 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1325 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1326 self.assertCountEqual(
1327 list(
1328 subsetDataIds.findDatasets(
1329 bias,
1330 collections=["imported_r", "imported_g"],
1331 findFirst=False
1332 )
1333 ),
1334 expectedAllBiases
1335 )
1336 self.assertCountEqual(
1337 list(
1338 subsetDataIds.findDatasets(
1339 bias,
1340 collections=["imported_r", "imported_g"],
1341 findFirst=True
1342 )
1343 ), expectedDeduplicatedBiases
1344 )
1345 # Materialize the bias dataset queries, too, so now we're
1346 # materializing every single step.
1347 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1348 findFirst=False).materialize() as biases:
1349 self.assertCountEqual(list(biases), expectedAllBiases)
1350 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1351 findFirst=True).materialize() as biases:
1352 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1354 def testEmptyDimensionsQueries(self):
1355 """Test Query and QueryResults objects in the case where there are no
1356 dimensions.
1357 """
1358 # Set up test data: one dataset type, two runs, one dataset in each.
1359 registry = self.makeRegistry()
1360 self.loadData(registry, "base.yaml")
1361 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1362 registry.registerDatasetType(schema)
1363 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1364 run1 = "run1"
1365 run2 = "run2"
1366 registry.registerRun(run1)
1367 registry.registerRun(run2)
1368 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1369 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1370 # Query directly for both of the datasets, and each one, one at a time.
1371 self.assertCountEqual(
1372 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1373 [dataset1, dataset2]
1374 )
1375 self.assertEqual(
1376 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1377 [dataset1],
1378 )
1379 self.assertEqual(
1380 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1381 [dataset2],
1382 )
1383 # Query for data IDs with no dimensions.
1384 dataIds = registry.queryDataIds([])
1385 self.assertEqual(
1386 dataIds.toSequence(),
1387 DataCoordinateSequence([dataId], registry.dimensions.empty)
1388 )
1389 # Use queried data IDs to find the datasets.
1390 self.assertCountEqual(
1391 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1392 [dataset1, dataset2],
1393 )
1394 self.assertEqual(
1395 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1396 [dataset1],
1397 )
1398 self.assertEqual(
1399 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1400 [dataset2],
1401 )
1402 # Now materialize the data ID query results and repeat those tests.
1403 with dataIds.materialize() as dataIds:
1404 self.assertEqual(
1405 dataIds.toSequence(),
1406 DataCoordinateSequence([dataId], registry.dimensions.empty)
1407 )
1408 self.assertCountEqual(
1409 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1410 [dataset1, dataset2],
1411 )
1412 self.assertEqual(
1413 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1414 [dataset1],
1415 )
1416 self.assertEqual(
1417 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1418 [dataset2],
1419 )
1420 # Query for non-empty data IDs, then subset that to get the empty one.
1421 # Repeat the above tests starting from that.
1422 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1423 self.assertEqual(
1424 dataIds.toSequence(),
1425 DataCoordinateSequence([dataId], registry.dimensions.empty)
1426 )
1427 self.assertCountEqual(
1428 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1429 [dataset1, dataset2],
1430 )
1431 self.assertEqual(
1432 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1433 [dataset1],
1434 )
1435 self.assertEqual(
1436 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1437 [dataset2],
1438 )
1439 with dataIds.materialize() as dataIds:
1440 self.assertEqual(
1441 dataIds.toSequence(),
1442 DataCoordinateSequence([dataId], registry.dimensions.empty)
1443 )
1444 self.assertCountEqual(
1445 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1446 [dataset1, dataset2],
1447 )
1448 self.assertEqual(
1449 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1450 [dataset1],
1451 )
1452 self.assertEqual(
1453 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1454 [dataset2],
1455 )
1456 # Query for non-empty data IDs, then materialize, then subset to get
1457 # the empty one. Repeat again.
1458 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1459 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1460 self.assertEqual(
1461 dataIds.toSequence(),
1462 DataCoordinateSequence([dataId], registry.dimensions.empty)
1463 )
1464 self.assertCountEqual(
1465 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1466 [dataset1, dataset2],
1467 )
1468 self.assertEqual(
1469 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1470 [dataset1],
1471 )
1472 self.assertEqual(
1473 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1474 [dataset2],
1475 )
1476 with dataIds.materialize() as dataIds:
1477 self.assertEqual(
1478 dataIds.toSequence(),
1479 DataCoordinateSequence([dataId], registry.dimensions.empty)
1480 )
1481 self.assertCountEqual(
1482 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1483 [dataset1, dataset2],
1484 )
1485 self.assertEqual(
1486 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1487 [dataset1],
1488 )
1489 self.assertEqual(
1490 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1491 [dataset2],
1492 )
1494 def testCalibrationCollections(self):
1495 """Test operations on `~CollectionType.CALIBRATION` collections,
1496 including `Registry.certify`, `Registry.decertify`, and
1497 `Registry.findDataset`.
1498 """
1499 # Setup - make a Registry, fill it with some datasets in
1500 # non-calibration collections.
1501 registry = self.makeRegistry()
1502 self.loadData(registry, "base.yaml")
1503 self.loadData(registry, "datasets.yaml")
1504 # Set up some timestamps.
1505 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1506 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1507 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1508 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1509 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1510 allTimespans = [
1511 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1512 ]
1513 # Get references to some datasets.
1514 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1515 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1516 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1517 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1518 # Register the main calibration collection we'll be working with.
1519 collection = "Cam1/calibs/default"
1520 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1521 # Cannot associate into a calibration collection (no timespan).
1522 with self.assertRaises(TypeError):
1523 registry.associate(collection, [bias2a])
1524 # Certify 2a dataset with [t2, t4) validity.
1525 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1526 # We should not be able to certify 2b with anything overlapping that
1527 # window.
1528 with self.assertRaises(ConflictingDefinitionError):
1529 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1530 with self.assertRaises(ConflictingDefinitionError):
1531 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1532 with self.assertRaises(ConflictingDefinitionError):
1533 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1534 with self.assertRaises(ConflictingDefinitionError):
1535 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1536 with self.assertRaises(ConflictingDefinitionError):
1537 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1538 with self.assertRaises(ConflictingDefinitionError):
1539 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1540 with self.assertRaises(ConflictingDefinitionError):
1541 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1542 with self.assertRaises(ConflictingDefinitionError):
1543 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1544 # We should be able to certify 3a with a range overlapping that window,
1545 # because it's for a different detector.
1546 # We'll certify 3a over [t1, t3).
1547 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1548 # Now we'll certify 2b and 3b together over [t4, ∞).
1549 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1551 # Fetch all associations and check that they are what we expect.
1552 self.assertCountEqual(
1553 list(
1554 registry.queryDatasetAssociations(
1555 "bias",
1556 collections=[collection, "imported_g", "imported_r"],
1557 )
1558 ),
1559 [
1560 DatasetAssociation(
1561 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1562 collection="imported_g",
1563 timespan=None,
1564 ),
1565 DatasetAssociation(
1566 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1567 collection="imported_r",
1568 timespan=None,
1569 ),
1570 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1571 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1572 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1573 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1574 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1575 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1576 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1577 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1578 ]
1579 )
1581 class Ambiguous:
1582 """Tag class to denote lookups that are expected to be ambiguous.
1583 """
1584 pass
1586 def assertLookup(detector: int, timespan: Timespan,
1587 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1588 """Local function that asserts that a bias lookup returns the given
1589 expected result.
1590 """
1591 if expected is Ambiguous:
1592 with self.assertRaises(RuntimeError):
1593 registry.findDataset("bias", collections=collection, instrument="Cam1",
1594 detector=detector, timespan=timespan)
1595 else:
1596 self.assertEqual(
1597 expected,
1598 registry.findDataset("bias", collections=collection, instrument="Cam1",
1599 detector=detector, timespan=timespan)
1600 )
1602 # Systematically test lookups against expected results.
1603 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1604 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1605 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1606 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1607 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1608 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1609 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1610 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1611 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1612 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1613 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1614 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1615 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1616 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1617 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1618 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1619 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1620 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1621 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1622 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1623 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1624 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1625 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1626 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1627 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1628 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1629 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1630 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1631 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1632 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1633 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1634 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1635 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1636 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1637 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1638 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1639 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1640 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1641 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1642 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1643 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1644 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1646 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1647 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1648 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1649 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1650 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1651 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1652 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1653 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1654 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1655 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1656 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1657 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1658 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1659 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1660 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1661 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1662 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1663 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1664 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1665 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1666 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1667 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1668 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1669 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1670 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1671 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1672 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1673 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1674 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1675 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1676 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1677 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1678 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1679 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1680 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1681 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1682 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1683 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1684 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1685 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1686 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1687 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1688 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1689 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1690 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1691 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1693 # Decertify everything, this time with explicit data IDs, then check
1694 # that no lookups succeed.
1695 registry.decertify(
1696 collection, "bias", Timespan(None, None),
1697 dataIds=[
1698 dict(instrument="Cam1", detector=2),
1699 dict(instrument="Cam1", detector=3),
1700 ]
1701 )
1702 for detector in (2, 3):
1703 for timespan in allTimespans:
1704 assertLookup(detector=detector, timespan=timespan, expected=None)
1705 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1706 # those.
1707 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1708 for timespan in allTimespans:
1709 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1710 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1711 # Decertify just bias2 over [t2, t4).
1712 # This should split a single certification row into two (and leave the
1713 # other existing row, for bias3a, alone).
1714 registry.decertify(collection, "bias", Timespan(t2, t4),
1715 dataIds=[dict(instrument="Cam1", detector=2)])
1716 for timespan in allTimespans:
1717 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1718 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1719 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1720 if overlapsBefore and overlapsAfter:
1721 expected = Ambiguous
1722 elif overlapsBefore or overlapsAfter:
1723 expected = bias2a
1724 else:
1725 expected = None
1726 assertLookup(detector=2, timespan=timespan, expected=expected)
1728 def testIngestTimeQuery(self):
1730 registry = self.makeRegistry()
1731 self.loadData(registry, "base.yaml")
1732 self.loadData(registry, "datasets.yaml")
1734 datasets = list(registry.queryDatasets(..., collections=...))
1735 len0 = len(datasets)
1736 self.assertGreater(len0, 0)
1738 where = "ingest_date > T'2000-01-01'"
1739 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1740 len1 = len(datasets)
1741 self.assertEqual(len0, len1)
1743 # no one will ever use this piece of software in 30 years
1744 where = "ingest_date > T'2050-01-01'"
1745 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1746 len2 = len(datasets)
1747 self.assertEqual(len2, 0)
1749 def testTimespanQueries(self):
1750 """Test query expressions involving timespans.
1751 """
1752 registry = self.makeRegistry()
1753 self.loadData(registry, "hsc-rc2-subset.yaml")
1754 # All exposures in the database; mapping from ID to timespan.
1755 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1756 # Just those IDs, sorted (which is also temporal sorting, because HSC
1757 # exposure IDs are monotonically increasing).
1758 ids = sorted(visits.keys())
1759 self.assertGreater(len(ids), 20)
1760 # Pick some quasi-random indexes into `ids` to play with.
1761 i1 = int(len(ids)*0.1)
1762 i2 = int(len(ids)*0.3)
1763 i3 = int(len(ids)*0.6)
1764 i4 = int(len(ids)*0.8)
1765 # Extract some times from those: just before the beginning of i1 (which
1766 # should be after the end of the exposure before), exactly the
1767 # beginning of i2, just after the beginning of i3 (and before its end),
1768 # and the exact end of i4.
1769 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1770 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1771 t2 = visits[ids[i2]].begin
1772 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1773 self.assertLess(t3, visits[ids[i3]].end)
1774 t4 = visits[ids[i4]].end
1775 # Make sure those are actually in order.
1776 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1778 bind = {
1779 "t1": t1,
1780 "t2": t2,
1781 "t3": t3,
1782 "t4": t4,
1783 "ts23": Timespan(t2, t3),
1784 }
1786 def query(where):
1787 """Helper function that queries for visit data IDs and returns
1788 results as a sorted, deduplicated list of visit IDs.
1789 """
1790 return sorted(
1791 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1792 instrument="HSC",
1793 bind=bind,
1794 where=where)}
1795 )
1797 # Try a bunch of timespan queries, mixing up the bounds themselves,
1798 # where they appear in the expression, and how we get the timespan into
1799 # the expression.
1801 # t1 is before the start of i1, so this should not include i1.
1802 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1803 # t2 is exactly at the start of i2, but ends are exclusive, so these
1804 # should not include i2.
1805 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1806 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1807 # t3 is in the middle of i3, so this should include i3.
1808 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1809 # This one should not include t3 by the same reasoning.
1810 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1811 # t4 is exactly at the end of i4, so this should include i4.
1812 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1813 # i4's upper bound of t4 is exclusive so this should not include t4.
1814 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1816 # Now some timespan vs. time scalar queries.
1817 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1818 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1819 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1820 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1821 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1822 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1824 # Empty timespans should not overlap anything.
1825 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
1827 def testCollectionSummaries(self):
1828 """Test recording and retrieval of collection summaries.
1829 """
1830 self.maxDiff = None
1831 registry = self.makeRegistry()
1832 # Importing datasets from yaml should go through the code path where
1833 # we update collection summaries as we insert datasets.
1834 self.loadData(registry, "base.yaml")
1835 self.loadData(registry, "datasets.yaml")
1836 flat = registry.getDatasetType("flat")
1837 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
1838 expected1.datasetTypes.add(registry.getDatasetType("bias"))
1839 expected1.datasetTypes.add(flat)
1840 expected1.dimensions.update_extract(
1841 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
1842 )
1843 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1844 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1845 # Create a chained collection with both of the imported runs; the
1846 # summary should be the same, because it's a union with itself.
1847 chain = "chain"
1848 registry.registerCollection(chain, CollectionType.CHAINED)
1849 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
1850 self.assertEqual(registry.getCollectionSummary(chain), expected1)
1851 # Associate flats only into a tagged collection and a calibration
1852 # collection to check summaries of those.
1853 tag = "tag"
1854 registry.registerCollection(tag, CollectionType.TAGGED)
1855 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
1856 calibs = "calibs"
1857 registry.registerCollection(calibs, CollectionType.CALIBRATION)
1858 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
1859 timespan=Timespan(None, None))
1860 expected2 = expected1.copy()
1861 expected2.datasetTypes.discard("bias")
1862 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1863 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1864 # Explicitly calling Registry.refresh() should load those same
1865 # summaries, via a totally different code path.
1866 registry.refresh()
1867 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1868 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1869 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1870 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1872 def testUnrelatedDimensionQueries(self):
1873 """Test that WHERE expressions in queries can reference dimensions that
1874 are not in the result set.
1875 """
1876 registry = self.makeRegistry()
1877 # There is no data to back this query, but it should still return
1878 # zero records instead of raising.
1879 self.assertFalse(
1880 set(registry.queryDataIds(["visit", "detector"],
1881 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
1882 )