Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union, TYPE_CHECKING
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from ..summaries import CollectionSummary
56from .._collectionType import CollectionType
57from .._config import RegistryConfig
59from .._exceptions import (
60 ConflictingDefinitionError,
61 InconsistentDataIdError,
62 MissingCollectionError,
63 OrphanedRecordError,
64)
65from ..interfaces import ButlerAttributeExistsError
67if TYPE_CHECKING: 67 ↛ 68line 67 didn't jump to line 68, because the condition on line 67 was never true
68 from .._registry import Registry
71class RegistryTests(ABC):
72 """Generic tests for the `Registry` class that can be subclassed to
73 generate tests for different configurations.
74 """
76 collectionsManager: Optional[str] = None
77 """Name of the collections manager class, if subclass provides value for
78 this member then it overrides name specified in default configuration
79 (`str`).
80 """
82 datasetsManager: Optional[str] = None
83 """Name of the datasets manager class, if subclass provides value for
84 this member then it overrides name specified in default configuration
85 (`str`).
86 """
88 @classmethod
89 @abstractmethod
90 def getDataDir(cls) -> str:
91 """Return the root directory containing test data YAML files.
92 """
93 raise NotImplementedError()
95 def makeRegistryConfig(self) -> RegistryConfig:
96 """Create RegistryConfig used to create a registry.
98 This method should be called by a subclass from `makeRegistry`.
99 Returned instance will be pre-configured based on the values of class
100 members, and default-configured for all other parametrs. Subclasses
101 that need default configuration should just instantiate
102 `RegistryConfig` directly.
103 """
104 config = RegistryConfig()
105 if self.collectionsManager:
106 config["managers", "collections"] = self.collectionsManager
107 if self.datasetsManager:
108 config["managers", "datasets"] = self.datasetsManager
109 return config
111 @abstractmethod
112 def makeRegistry(self) -> Registry:
113 """Return the Registry instance to be tested.
114 """
115 raise NotImplementedError()
117 def loadData(self, registry: Registry, filename: str):
118 """Load registry test data from ``getDataDir/<filename>``,
119 which should be a YAML import/export file.
120 """
121 from ...transfers import YamlRepoImportBackend
122 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
123 backend = YamlRepoImportBackend(stream, registry)
124 backend.register()
125 backend.load(datastore=None)
127 def testOpaque(self):
128 """Tests for `Registry.registerOpaqueTable`,
129 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
130 `Registry.deleteOpaqueData`.
131 """
132 registry = self.makeRegistry()
133 table = "opaque_table_for_testing"
134 registry.registerOpaqueTable(
135 table,
136 spec=ddl.TableSpec(
137 fields=[
138 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
139 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
140 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
141 ],
142 )
143 )
144 rows = [
145 {"id": 1, "name": "one", "count": None},
146 {"id": 2, "name": "two", "count": 5},
147 {"id": 3, "name": "three", "count": 6},
148 ]
149 registry.insertOpaqueData(table, *rows)
150 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
151 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
152 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
153 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
154 registry.deleteOpaqueData(table, id=3)
155 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
156 registry.deleteOpaqueData(table)
157 self.assertEqual([], list(registry.fetchOpaqueData(table)))
159 def testDatasetType(self):
160 """Tests for `Registry.registerDatasetType` and
161 `Registry.getDatasetType`.
162 """
163 registry = self.makeRegistry()
164 # Check valid insert
165 datasetTypeName = "test"
166 storageClass = StorageClass("testDatasetType")
167 registry.storageClasses.registerStorageClass(storageClass)
168 dimensions = registry.dimensions.extract(("instrument", "visit"))
169 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
170 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
171 # Inserting for the first time should return True
172 self.assertTrue(registry.registerDatasetType(inDatasetType))
173 outDatasetType1 = registry.getDatasetType(datasetTypeName)
174 self.assertEqual(outDatasetType1, inDatasetType)
176 # Re-inserting should work
177 self.assertFalse(registry.registerDatasetType(inDatasetType))
178 # Except when they are not identical
179 with self.assertRaises(ConflictingDefinitionError):
180 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
181 registry.registerDatasetType(nonIdenticalDatasetType)
183 # Template can be None
184 datasetTypeName = "testNoneTemplate"
185 storageClass = StorageClass("testDatasetType2")
186 registry.storageClasses.registerStorageClass(storageClass)
187 dimensions = registry.dimensions.extract(("instrument", "visit"))
188 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
189 registry.registerDatasetType(inDatasetType)
190 outDatasetType2 = registry.getDatasetType(datasetTypeName)
191 self.assertEqual(outDatasetType2, inDatasetType)
193 allTypes = set(registry.queryDatasetTypes())
194 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
196 def testDimensions(self):
197 """Tests for `Registry.insertDimensionData`,
198 `Registry.syncDimensionData`, and `Registry.expandDataId`.
199 """
200 registry = self.makeRegistry()
201 dimensionName = "instrument"
202 dimension = registry.dimensions[dimensionName]
203 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
204 "class_name": "lsst.obs.base.Instrument"}
205 registry.insertDimensionData(dimensionName, dimensionValue)
206 # Inserting the same value twice should fail
207 with self.assertRaises(sqlalchemy.exc.IntegrityError):
208 registry.insertDimensionData(dimensionName, dimensionValue)
209 # expandDataId should retrieve the record we just inserted
210 self.assertEqual(
211 registry.expandDataId(
212 instrument="DummyCam",
213 graph=dimension.graph
214 ).records[dimensionName].toDict(),
215 dimensionValue
216 )
217 # expandDataId should raise if there is no record with the given ID.
218 with self.assertRaises(LookupError):
219 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
220 # band doesn't have a table; insert should fail.
221 with self.assertRaises(TypeError):
222 registry.insertDimensionData("band", {"band": "i"})
223 dimensionName2 = "physical_filter"
224 dimension2 = registry.dimensions[dimensionName2]
225 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
226 # Missing required dependency ("instrument") should fail
227 with self.assertRaises(KeyError):
228 registry.insertDimensionData(dimensionName2, dimensionValue2)
229 # Adding required dependency should fix the failure
230 dimensionValue2["instrument"] = "DummyCam"
231 registry.insertDimensionData(dimensionName2, dimensionValue2)
232 # expandDataId should retrieve the record we just inserted.
233 self.assertEqual(
234 registry.expandDataId(
235 instrument="DummyCam", physical_filter="DummyCam_i",
236 graph=dimension2.graph
237 ).records[dimensionName2].toDict(),
238 dimensionValue2
239 )
240 # Use syncDimensionData to insert a new record successfully.
241 dimensionName3 = "detector"
242 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
243 "name_in_raft": "zero", "purpose": "SCIENCE"}
244 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
245 # Sync that again. Note that one field ("raft") is NULL, and that
246 # should be okay.
247 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
248 # Now try that sync with the same primary key but a different value.
249 # This should fail.
250 with self.assertRaises(ConflictingDefinitionError):
251 registry.syncDimensionData(
252 dimensionName3,
253 {"instrument": "DummyCam", "id": 1, "full_name": "one",
254 "name_in_raft": "four", "purpose": "SCIENCE"}
255 )
257 @unittest.skipIf(np is None, "numpy not available.")
258 def testNumpyDataId(self):
259 """Test that we can use a numpy int in a dataId."""
260 registry = self.makeRegistry()
261 dimensionEntries = [
262 ("instrument", {"instrument": "DummyCam"}),
263 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
264 # Using an np.int64 here fails unless Records.fromDict is also
265 # patched to look for numbers.Integral
266 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
267 ]
268 for args in dimensionEntries:
269 registry.insertDimensionData(*args)
271 # Try a normal integer and something that looks like an int but
272 # is not.
273 for visit_id in (42, np.int64(42)):
274 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
275 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
276 self.assertEqual(expanded["visit"], int(visit_id))
277 self.assertIsInstance(expanded["visit"], int)
279 def testDataIdRelationships(self):
280 """Test that `Registry.expandDataId` raises an exception when the given
281 keys are inconsistent.
282 """
283 registry = self.makeRegistry()
284 self.loadData(registry, "base.yaml")
285 # Insert a few more dimension records for the next test.
286 registry.insertDimensionData(
287 "exposure",
288 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
289 )
290 registry.insertDimensionData(
291 "exposure",
292 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
293 )
294 registry.insertDimensionData(
295 "visit_system",
296 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
297 )
298 registry.insertDimensionData(
299 "visit",
300 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
301 )
302 registry.insertDimensionData(
303 "visit_definition",
304 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
305 )
306 with self.assertRaises(InconsistentDataIdError):
307 registry.expandDataId(
308 {"instrument": "Cam1", "visit": 1, "exposure": 2},
309 )
311 def testDataset(self):
312 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
313 and `Registry.removeDatasets`.
314 """
315 registry = self.makeRegistry()
316 self.loadData(registry, "base.yaml")
317 run = "test"
318 registry.registerRun(run)
319 datasetType = registry.getDatasetType("bias")
320 dataId = {"instrument": "Cam1", "detector": 2}
321 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
322 outRef = registry.getDataset(ref.id)
323 self.assertIsNotNone(ref.id)
324 self.assertEqual(ref, outRef)
325 with self.assertRaises(ConflictingDefinitionError):
326 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
327 registry.removeDatasets([ref])
328 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
330 def testFindDataset(self):
331 """Tests for `Registry.findDataset`.
332 """
333 registry = self.makeRegistry()
334 self.loadData(registry, "base.yaml")
335 run = "test"
336 datasetType = registry.getDatasetType("bias")
337 dataId = {"instrument": "Cam1", "detector": 4}
338 registry.registerRun(run)
339 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
340 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
341 self.assertEqual(outputRef, inputRef)
342 # Check that retrieval with invalid dataId raises
343 with self.assertRaises(LookupError):
344 dataId = {"instrument": "Cam1"} # no detector
345 registry.findDataset(datasetType, dataId, collections=run)
346 # Check that different dataIds match to different datasets
347 dataId1 = {"instrument": "Cam1", "detector": 1}
348 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
349 dataId2 = {"instrument": "Cam1", "detector": 2}
350 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
351 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
352 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
353 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
354 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
355 # Check that requesting a non-existing dataId returns None
356 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
357 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
359 def testRemoveDatasetTypeSuccess(self):
360 """Test that Registry.removeDatasetType works when there are no
361 datasets of that type present.
362 """
363 registry = self.makeRegistry()
364 self.loadData(registry, "base.yaml")
365 registry.removeDatasetType("flat")
366 with self.assertRaises(KeyError):
367 registry.getDatasetType("flat")
369 def testRemoveDatasetTypeFailure(self):
370 """Test that Registry.removeDatasetType raises when there are datasets
371 of that type present or if the dataset type is for a component.
372 """
373 registry = self.makeRegistry()
374 self.loadData(registry, "base.yaml")
375 self.loadData(registry, "datasets.yaml")
376 with self.assertRaises(OrphanedRecordError):
377 registry.removeDatasetType("flat")
378 with self.assertRaises(ValueError):
379 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
381 def testDatasetTypeComponentQueries(self):
382 """Test component options when querying for dataset types.
383 """
384 registry = self.makeRegistry()
385 self.loadData(registry, "base.yaml")
386 self.loadData(registry, "datasets.yaml")
387 # Test querying for dataset types with different inputs.
388 # First query for all dataset types; components should only be included
389 # when components=True.
390 self.assertEqual(
391 {"bias", "flat"},
392 NamedValueSet(registry.queryDatasetTypes()).names
393 )
394 self.assertEqual(
395 {"bias", "flat"},
396 NamedValueSet(registry.queryDatasetTypes(components=False)).names
397 )
398 self.assertLess(
399 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
400 NamedValueSet(registry.queryDatasetTypes(components=True)).names
401 )
402 # Use a pattern that can match either parent or components. Again,
403 # components are only returned if components=True.
404 self.assertEqual(
405 {"bias"},
406 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
407 )
408 self.assertEqual(
409 {"bias"},
410 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
411 )
412 self.assertLess(
413 {"bias", "bias.wcs"},
414 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
415 )
416 # This pattern matches only a component. In this case we also return
417 # that component dataset type if components=None.
418 self.assertEqual(
419 {"bias.wcs"},
420 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
421 )
422 self.assertEqual(
423 set(),
424 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
425 )
426 self.assertEqual(
427 {"bias.wcs"},
428 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
429 )
430 # Add a dataset type using a StorageClass that we'll then remove; check
431 # that this does not affect our ability to query for dataset types
432 # (though it will warn).
433 tempStorageClass = StorageClass(
434 name="TempStorageClass",
435 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
436 )
437 registry.storageClasses.registerStorageClass(tempStorageClass)
438 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
439 universe=registry.dimensions)
440 registry.registerDatasetType(datasetType)
441 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
442 datasetType._storageClass = None
443 del tempStorageClass
444 # Querying for all dataset types, including components, should include
445 # at least all non-component dataset types (and I don't want to
446 # enumerate all of the Exposure components for bias and flat here).
447 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
448 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
449 self.assertIn("TempStorageClass", cm.output[0])
450 self.assertLess({"bias", "flat", "temporary"}, everything.names)
451 # It should not include "temporary.columns", because we tried to remove
452 # the storage class that would tell it about that. So if the next line
453 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
454 # this part of the test isn't doing anything, because the _unregister
455 # call about isn't simulating the real-life case we want it to
456 # simulate, in which different versions of daf_butler in entirely
457 # different Python processes interact with the same repo.
458 self.assertNotIn("temporary.data", everything.names)
459 # Query for dataset types that start with "temp". This should again
460 # not include the component, and also not fail.
461 with self.assertLogs("lsst.daf.butler.registries", logging.WARN) as cm:
462 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
463 self.assertIn("TempStorageClass", cm.output[0])
464 self.assertEqual({"temporary"}, startsWithTemp.names)
466 def testComponentLookups(self):
467 """Test searching for component datasets via their parents.
468 """
469 registry = self.makeRegistry()
470 self.loadData(registry, "base.yaml")
471 self.loadData(registry, "datasets.yaml")
472 # Test getting the child dataset type (which does still exist in the
473 # Registry), and check for consistency with
474 # DatasetRef.makeComponentRef.
475 collection = "imported_g"
476 parentType = registry.getDatasetType("bias")
477 childType = registry.getDatasetType("bias.wcs")
478 parentRefResolved = registry.findDataset(parentType, collections=collection,
479 instrument="Cam1", detector=1)
480 self.assertIsInstance(parentRefResolved, DatasetRef)
481 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
482 # Search for a single dataset with findDataset.
483 childRef1 = registry.findDataset("bias.wcs", collections=collection,
484 dataId=parentRefResolved.dataId)
485 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
486 # Search for detector data IDs constrained by component dataset
487 # existence with queryDataIds.
488 dataIds = registry.queryDataIds(
489 ["detector"],
490 datasets=["bias.wcs"],
491 collections=collection,
492 ).toSet()
493 self.assertEqual(
494 dataIds,
495 DataCoordinateSet(
496 {
497 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
498 for d in (1, 2, 3)
499 },
500 parentType.dimensions,
501 )
502 )
503 # Search for multiple datasets of a single type with queryDatasets.
504 childRefs2 = set(registry.queryDatasets(
505 "bias.wcs",
506 collections=collection,
507 ))
508 self.assertEqual(
509 {ref.unresolved() for ref in childRefs2},
510 {DatasetRef(childType, dataId) for dataId in dataIds}
511 )
513 def testCollections(self):
514 """Tests for registry methods that manage collections.
515 """
516 registry = self.makeRegistry()
517 self.loadData(registry, "base.yaml")
518 self.loadData(registry, "datasets.yaml")
519 run1 = "imported_g"
520 run2 = "imported_r"
521 # Test setting a collection docstring after it has been created.
522 registry.setCollectionDocumentation(run1, "doc for run1")
523 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
524 registry.setCollectionDocumentation(run1, None)
525 self.assertIsNone(registry.getCollectionDocumentation(run1))
526 datasetType = "bias"
527 # Find some datasets via their run's collection.
528 dataId1 = {"instrument": "Cam1", "detector": 1}
529 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
530 self.assertIsNotNone(ref1)
531 dataId2 = {"instrument": "Cam1", "detector": 2}
532 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
533 self.assertIsNotNone(ref2)
534 # Associate those into a new collection,then look for them there.
535 tag1 = "tag1"
536 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
537 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
538 registry.associate(tag1, [ref1, ref2])
539 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
540 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
541 # Disassociate one and verify that we can't it there anymore...
542 registry.disassociate(tag1, [ref1])
543 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
544 # ...but we can still find ref2 in tag1, and ref1 in the run.
545 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
546 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
547 collections = set(registry.queryCollections())
548 self.assertEqual(collections, {run1, run2, tag1})
549 # Associate both refs into tag1 again; ref2 is already there, but that
550 # should be a harmless no-op.
551 registry.associate(tag1, [ref1, ref2])
552 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
553 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
554 # Get a different dataset (from a different run) that has the same
555 # dataset type and data ID as ref2.
556 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
557 self.assertNotEqual(ref2, ref2b)
558 # Attempting to associate that into tag1 should be an error.
559 with self.assertRaises(ConflictingDefinitionError):
560 registry.associate(tag1, [ref2b])
561 # That error shouldn't have messed up what we had before.
562 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
563 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
564 # Attempt to associate the conflicting dataset again, this time with
565 # a dataset that isn't in the collection and won't cause a conflict.
566 # Should also fail without modifying anything.
567 dataId3 = {"instrument": "Cam1", "detector": 3}
568 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
569 with self.assertRaises(ConflictingDefinitionError):
570 registry.associate(tag1, [ref3, ref2b])
571 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
572 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
573 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
574 # Register a chained collection that searches [tag1, run2]
575 chain1 = "chain1"
576 registry.registerCollection(chain1, type=CollectionType.CHAINED)
577 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
578 # Chained collection exists, but has no collections in it.
579 self.assertFalse(registry.getCollectionChain(chain1))
580 # If we query for all collections, we should get the chained collection
581 # only if we don't ask to flatten it (i.e. yield only its children).
582 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
583 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
584 # Attempt to set its child collections to something circular; that
585 # should fail.
586 with self.assertRaises(ValueError):
587 registry.setCollectionChain(chain1, [tag1, chain1])
588 # Add the child collections.
589 registry.setCollectionChain(chain1, [tag1, run2])
590 self.assertEqual(
591 list(registry.getCollectionChain(chain1)),
592 [tag1, run2]
593 )
594 # Searching for dataId1 or dataId2 in the chain should return ref1 and
595 # ref2, because both are in tag1.
596 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
597 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
598 # Now disassociate ref2 from tag1. The search (for bias) with
599 # dataId2 in chain1 should then:
600 # 1. not find it in tag1
601 # 2. find a different dataset in run2
602 registry.disassociate(tag1, [ref2])
603 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
604 self.assertNotEqual(ref2b, ref2)
605 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
606 # Define a new chain so we can test recursive chains.
607 chain2 = "chain2"
608 registry.registerCollection(chain2, type=CollectionType.CHAINED)
609 registry.setCollectionChain(chain2, [run2, chain1])
610 # Query for collections matching a regex.
611 self.assertCountEqual(
612 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
613 ["imported_r", "imported_g"]
614 )
615 # Query for collections matching a regex or an explicit str.
616 self.assertCountEqual(
617 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
618 ["imported_r", "imported_g", "chain1"]
619 )
620 # Search for bias with dataId1 should find it via tag1 in chain2,
621 # recursing, because is not in run1.
622 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
623 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
624 # Search for bias with dataId2 should find it in run2 (ref2b).
625 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
626 # Search for a flat that is in run2. That should not be found
627 # at the front of chain2, because of the restriction to bias
628 # on run2 there, but it should be found in at the end of chain1.
629 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
630 ref4 = registry.findDataset("flat", dataId4, collections=run2)
631 self.assertIsNotNone(ref4)
632 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
633 # Deleting a collection that's part of a CHAINED collection is not
634 # allowed, and is exception-safe.
635 with self.assertRaises(Exception):
636 registry.removeCollection(run2)
637 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
638 with self.assertRaises(Exception):
639 registry.removeCollection(chain1)
640 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
641 # Actually remove chain2, test that it's gone by asking for its type.
642 registry.removeCollection(chain2)
643 with self.assertRaises(MissingCollectionError):
644 registry.getCollectionType(chain2)
645 # Actually remove run2 and chain1, which should work now.
646 registry.removeCollection(chain1)
647 registry.removeCollection(run2)
648 with self.assertRaises(MissingCollectionError):
649 registry.getCollectionType(run2)
650 with self.assertRaises(MissingCollectionError):
651 registry.getCollectionType(chain1)
652 # Remove tag1 as well, just to test that we can remove TAGGED
653 # collections.
654 registry.removeCollection(tag1)
655 with self.assertRaises(MissingCollectionError):
656 registry.getCollectionType(tag1)
658 def testCollectionChainFlatten(self):
659 """Test that Registry.setCollectionChain obeys its 'flatten' option.
660 """
661 registry = self.makeRegistry()
662 registry.registerCollection("inner", CollectionType.CHAINED)
663 registry.registerCollection("innermost", CollectionType.RUN)
664 registry.setCollectionChain("inner", ["innermost"])
665 registry.registerCollection("outer", CollectionType.CHAINED)
666 registry.setCollectionChain("outer", ["inner"], flatten=False)
667 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
668 registry.setCollectionChain("outer", ["inner"], flatten=True)
669 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
671 def testBasicTransaction(self):
672 """Test that all operations within a single transaction block are
673 rolled back if an exception propagates out of the block.
674 """
675 registry = self.makeRegistry()
676 storageClass = StorageClass("testDatasetType")
677 registry.storageClasses.registerStorageClass(storageClass)
678 with registry.transaction():
679 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
680 with self.assertRaises(ValueError):
681 with registry.transaction():
682 registry.insertDimensionData("instrument", {"name": "Cam2"})
683 raise ValueError("Oops, something went wrong")
684 # Cam1 should exist
685 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
686 # But Cam2 and Cam3 should both not exist
687 with self.assertRaises(LookupError):
688 registry.expandDataId(instrument="Cam2")
689 with self.assertRaises(LookupError):
690 registry.expandDataId(instrument="Cam3")
692 def testNestedTransaction(self):
693 """Test that operations within a transaction block are not rolled back
694 if an exception propagates out of an inner transaction block and is
695 then caught.
696 """
697 registry = self.makeRegistry()
698 dimension = registry.dimensions["instrument"]
699 dataId1 = {"instrument": "DummyCam"}
700 dataId2 = {"instrument": "DummyCam2"}
701 checkpointReached = False
702 with registry.transaction():
703 # This should be added and (ultimately) committed.
704 registry.insertDimensionData(dimension, dataId1)
705 with self.assertRaises(sqlalchemy.exc.IntegrityError):
706 with registry.transaction(savepoint=True):
707 # This does not conflict, and should succeed (but not
708 # be committed).
709 registry.insertDimensionData(dimension, dataId2)
710 checkpointReached = True
711 # This should conflict and raise, triggerring a rollback
712 # of the previous insertion within the same transaction
713 # context, but not the original insertion in the outer
714 # block.
715 registry.insertDimensionData(dimension, dataId1)
716 self.assertTrue(checkpointReached)
717 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
718 with self.assertRaises(LookupError):
719 registry.expandDataId(dataId2, graph=dimension.graph)
721 def testInstrumentDimensions(self):
722 """Test queries involving only instrument dimensions, with no joins to
723 skymap."""
724 registry = self.makeRegistry()
726 # need a bunch of dimensions and datasets for test
727 registry.insertDimensionData(
728 "instrument",
729 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
730 )
731 registry.insertDimensionData(
732 "physical_filter",
733 dict(instrument="DummyCam", name="dummy_r", band="r"),
734 dict(instrument="DummyCam", name="dummy_i", band="i"),
735 )
736 registry.insertDimensionData(
737 "detector",
738 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
739 )
740 registry.insertDimensionData(
741 "visit_system",
742 dict(instrument="DummyCam", id=1, name="default"),
743 )
744 registry.insertDimensionData(
745 "visit",
746 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
747 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
748 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
749 )
750 registry.insertDimensionData(
751 "exposure",
752 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
753 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
754 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
755 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
756 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
757 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
758 )
759 registry.insertDimensionData(
760 "visit_definition",
761 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
762 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
763 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
764 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
765 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
766 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
767 )
768 # dataset types
769 run1 = "test1_r"
770 run2 = "test2_r"
771 tagged2 = "test2_t"
772 registry.registerRun(run1)
773 registry.registerRun(run2)
774 registry.registerCollection(tagged2)
775 storageClass = StorageClass("testDataset")
776 registry.storageClasses.registerStorageClass(storageClass)
777 rawType = DatasetType(name="RAW",
778 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
779 storageClass=storageClass)
780 registry.registerDatasetType(rawType)
781 calexpType = DatasetType(name="CALEXP",
782 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
783 storageClass=storageClass)
784 registry.registerDatasetType(calexpType)
786 # add pre-existing datasets
787 for exposure in (100, 101, 110, 111):
788 for detector in (1, 2, 3):
789 # note that only 3 of 5 detectors have datasets
790 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
791 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
792 # exposures 100 and 101 appear in both run1 and tagged2.
793 # 100 has different datasets in the different collections
794 # 101 has the same dataset in both collections.
795 if exposure == 100:
796 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
797 if exposure in (100, 101):
798 registry.associate(tagged2, [ref])
799 # Add pre-existing datasets to tagged2.
800 for exposure in (200, 201):
801 for detector in (3, 4, 5):
802 # note that only 3 of 5 detectors have datasets
803 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
804 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
805 registry.associate(tagged2, [ref])
807 dimensions = DimensionGraph(
808 registry.dimensions,
809 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
810 )
811 # Test that single dim string works as well as list of str
812 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
813 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
814 self.assertEqual(rows, rowsI)
815 # with empty expression
816 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
817 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
818 for dataId in rows:
819 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
820 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
821 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
822 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
823 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
824 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
825 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
826 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
827 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
828 (100, 101, 110, 111))
829 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
830 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
832 # second collection
833 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
834 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
835 for dataId in rows:
836 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
837 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
838 (100, 101, 200, 201))
839 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
840 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
842 # with two input datasets
843 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
844 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
845 for dataId in rows:
846 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
847 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
848 (100, 101, 110, 111, 200, 201))
849 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
850 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
852 # limit to single visit
853 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
854 where="visit = 10", instrument="DummyCam").toSet()
855 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
856 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
857 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
858 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
860 # more limiting expression, using link names instead of Table.column
861 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
862 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
863 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
864 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
865 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
866 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
868 # expression excludes everything
869 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
870 where="visit > 1000", instrument="DummyCam").toSet()
871 self.assertEqual(len(rows), 0)
873 # Selecting by physical_filter, this is not in the dimensions, but it
874 # is a part of the full expression so it should work too.
875 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
876 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
877 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
878 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
879 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
880 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
882 def testSkyMapDimensions(self):
883 """Tests involving only skymap dimensions, no joins to instrument."""
884 registry = self.makeRegistry()
886 # need a bunch of dimensions and datasets for test, we want
887 # "band" in the test so also have to add physical_filter
888 # dimensions
889 registry.insertDimensionData(
890 "instrument",
891 dict(instrument="DummyCam")
892 )
893 registry.insertDimensionData(
894 "physical_filter",
895 dict(instrument="DummyCam", name="dummy_r", band="r"),
896 dict(instrument="DummyCam", name="dummy_i", band="i"),
897 )
898 registry.insertDimensionData(
899 "skymap",
900 dict(name="DummyMap", hash="sha!".encode("utf8"))
901 )
902 for tract in range(10):
903 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
904 registry.insertDimensionData(
905 "patch",
906 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
907 for patch in range(10)]
908 )
910 # dataset types
911 run = "test"
912 registry.registerRun(run)
913 storageClass = StorageClass("testDataset")
914 registry.storageClasses.registerStorageClass(storageClass)
915 calexpType = DatasetType(name="deepCoadd_calexp",
916 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
917 "band")),
918 storageClass=storageClass)
919 registry.registerDatasetType(calexpType)
920 mergeType = DatasetType(name="deepCoadd_mergeDet",
921 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
922 storageClass=storageClass)
923 registry.registerDatasetType(mergeType)
924 measType = DatasetType(name="deepCoadd_meas",
925 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
926 "band")),
927 storageClass=storageClass)
928 registry.registerDatasetType(measType)
930 dimensions = DimensionGraph(
931 registry.dimensions,
932 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
933 | measType.dimensions.required)
934 )
936 # add pre-existing datasets
937 for tract in (1, 3, 5):
938 for patch in (2, 4, 6, 7):
939 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
940 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
941 for aFilter in ("i", "r"):
942 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
943 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
945 # with empty expression
946 rows = registry.queryDataIds(dimensions,
947 datasets=[calexpType, mergeType], collections=run).toSet()
948 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
949 for dataId in rows:
950 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
951 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
952 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
953 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
955 # limit to 2 tracts and 2 patches
956 rows = registry.queryDataIds(dimensions,
957 datasets=[calexpType, mergeType], collections=run,
958 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
959 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
960 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
961 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
962 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
964 # limit to single filter
965 rows = registry.queryDataIds(dimensions,
966 datasets=[calexpType, mergeType], collections=run,
967 where="band = 'i'").toSet()
968 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
969 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
970 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
971 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
973 # expression excludes everything, specifying non-existing skymap is
974 # not a fatal error, it's operator error
975 rows = registry.queryDataIds(dimensions,
976 datasets=[calexpType, mergeType], collections=run,
977 where="skymap = 'Mars'").toSet()
978 self.assertEqual(len(rows), 0)
980 def testSpatialJoin(self):
981 """Test queries that involve spatial overlap joins.
982 """
983 registry = self.makeRegistry()
984 self.loadData(registry, "hsc-rc2-subset.yaml")
986 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
987 # the TopologicalFamily they belong to. We'll relate all elements in
988 # each family to all of the elements in each other family.
989 families = defaultdict(set)
990 # Dictionary of {element.name: {dataId: region}}.
991 regions = {}
992 for element in registry.dimensions.getDatabaseElements():
993 if element.spatial is not None:
994 families[element.spatial.name].add(element)
995 regions[element.name] = {
996 record.dataId: record.region for record in registry.queryDimensionRecords(element)
997 }
999 # If this check fails, it's not necessarily a problem - it may just be
1000 # a reasonable change to the default dimension definitions - but the
1001 # test below depends on there being more than one family to do anything
1002 # useful.
1003 self.assertEqual(len(families), 2)
1005 # Overlap DatabaseDimensionElements with each other.
1006 for family1, family2 in itertools.combinations(families, 2):
1007 for element1, element2 in itertools.product(families[family1], families[family2]):
1008 graph = DimensionGraph.union(element1.graph, element2.graph)
1009 # Construct expected set of overlapping data IDs via a
1010 # brute-force comparison of the regions we've already fetched.
1011 expected = {
1012 DataCoordinate.standardize(
1013 {**dataId1.byName(), **dataId2.byName()},
1014 graph=graph
1015 )
1016 for (dataId1, region1), (dataId2, region2)
1017 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1018 if not region1.isDisjointFrom(region2)
1019 }
1020 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1021 queried = set(registry.queryDataIds(graph))
1022 self.assertEqual(expected, queried)
1024 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1025 commonSkyPix = registry.dimensions.commonSkyPix
1026 for elementName, regions in regions.items():
1027 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1028 expected = set()
1029 for dataId, region in regions.items():
1030 for begin, end in commonSkyPix.pixelization.envelope(region):
1031 expected.update(
1032 DataCoordinate.standardize(
1033 {commonSkyPix.name: index, **dataId.byName()},
1034 graph=graph
1035 )
1036 for index in range(begin, end)
1037 )
1038 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1039 queried = set(registry.queryDataIds(graph))
1040 self.assertEqual(expected, queried)
1042 def testAbstractQuery(self):
1043 """Test that we can run a query that just lists the known
1044 bands. This is tricky because band is
1045 backed by a query against physical_filter.
1046 """
1047 registry = self.makeRegistry()
1048 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1049 registry.insertDimensionData(
1050 "physical_filter",
1051 dict(instrument="DummyCam", name="dummy_i", band="i"),
1052 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1053 dict(instrument="DummyCam", name="dummy_r", band="r"),
1054 )
1055 rows = registry.queryDataIds(["band"]).toSet()
1056 self.assertCountEqual(
1057 rows,
1058 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1059 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1060 )
1062 def testAttributeManager(self):
1063 """Test basic functionality of attribute manager.
1064 """
1065 # number of attributes with schema versions in a fresh database,
1066 # 6 managers with 3 records per manager, plus config for dimensions
1067 VERSION_COUNT = 6 * 3 + 1
1069 registry = self.makeRegistry()
1070 attributes = registry._managers.attributes
1072 # check what get() returns for non-existing key
1073 self.assertIsNone(attributes.get("attr"))
1074 self.assertEqual(attributes.get("attr", ""), "")
1075 self.assertEqual(attributes.get("attr", "Value"), "Value")
1076 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1078 # cannot store empty key or value
1079 with self.assertRaises(ValueError):
1080 attributes.set("", "value")
1081 with self.assertRaises(ValueError):
1082 attributes.set("attr", "")
1084 # set value of non-existing key
1085 attributes.set("attr", "value")
1086 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1087 self.assertEqual(attributes.get("attr"), "value")
1089 # update value of existing key
1090 with self.assertRaises(ButlerAttributeExistsError):
1091 attributes.set("attr", "value2")
1093 attributes.set("attr", "value2", force=True)
1094 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1095 self.assertEqual(attributes.get("attr"), "value2")
1097 # delete existing key
1098 self.assertTrue(attributes.delete("attr"))
1099 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1101 # delete non-existing key
1102 self.assertFalse(attributes.delete("non-attr"))
1104 # store bunch of keys and get the list back
1105 data = [
1106 ("version.core", "1.2.3"),
1107 ("version.dimensions", "3.2.1"),
1108 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1109 ]
1110 for key, value in data:
1111 attributes.set(key, value)
1112 items = dict(attributes.items())
1113 for key, value in data:
1114 self.assertEqual(items[key], value)
1116 def testQueryDatasetsDeduplication(self):
1117 """Test that the findFirst option to queryDatasets selects datasets
1118 from collections in the order given".
1119 """
1120 registry = self.makeRegistry()
1121 self.loadData(registry, "base.yaml")
1122 self.loadData(registry, "datasets.yaml")
1123 self.assertCountEqual(
1124 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1125 [
1126 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1127 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1128 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1129 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1130 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1131 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1132 ]
1133 )
1134 self.assertCountEqual(
1135 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1136 findFirst=True)),
1137 [
1138 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1139 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1140 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1141 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1142 ]
1143 )
1144 self.assertCountEqual(
1145 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1146 findFirst=True)),
1147 [
1148 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1149 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1150 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1151 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1152 ]
1153 )
1155 def testQueryResults(self):
1156 """Test querying for data IDs and then manipulating the QueryResults
1157 object returned to perform other queries.
1158 """
1159 registry = self.makeRegistry()
1160 self.loadData(registry, "base.yaml")
1161 self.loadData(registry, "datasets.yaml")
1162 bias = registry.getDatasetType("bias")
1163 flat = registry.getDatasetType("flat")
1164 # Obtain expected results from methods other than those we're testing
1165 # here. That includes:
1166 # - the dimensions of the data IDs we want to query:
1167 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1168 # - the dimensions of some other data IDs we'll extract from that:
1169 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1170 # - the data IDs we expect to obtain from the first queries:
1171 expectedDataIds = DataCoordinateSet(
1172 {
1173 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1174 universe=registry.dimensions)
1175 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1176 },
1177 graph=expectedGraph,
1178 hasFull=False,
1179 hasRecords=False,
1180 )
1181 # - the flat datasets we expect to find from those data IDs, in just
1182 # one collection (so deduplication is irrelevant):
1183 expectedFlats = [
1184 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1185 collections="imported_r"),
1186 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1187 collections="imported_r"),
1188 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1189 collections="imported_r"),
1190 ]
1191 # - the data IDs we expect to extract from that:
1192 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1193 # - the bias datasets we expect to find from those data IDs, after we
1194 # subset-out the physical_filter dimension, both with duplicates:
1195 expectedAllBiases = [
1196 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1197 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1198 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1199 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1200 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1201 ]
1202 # - ...and without duplicates:
1203 expectedDeduplicatedBiases = [
1204 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1205 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1206 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1207 ]
1208 # Test against those expected results, using a "lazy" query for the
1209 # data IDs (which re-executes that query each time we use it to do
1210 # something new).
1211 dataIds = registry.queryDataIds(
1212 ["detector", "physical_filter"],
1213 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1214 instrument="Cam1",
1215 )
1216 self.assertEqual(dataIds.graph, expectedGraph)
1217 self.assertEqual(dataIds.toSet(), expectedDataIds)
1218 self.assertCountEqual(
1219 list(
1220 dataIds.findDatasets(
1221 flat,
1222 collections=["imported_r"],
1223 )
1224 ),
1225 expectedFlats,
1226 )
1227 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1228 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1229 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1230 self.assertCountEqual(
1231 list(
1232 subsetDataIds.findDatasets(
1233 bias,
1234 collections=["imported_r", "imported_g"],
1235 findFirst=False
1236 )
1237 ),
1238 expectedAllBiases
1239 )
1240 self.assertCountEqual(
1241 list(
1242 subsetDataIds.findDatasets(
1243 bias,
1244 collections=["imported_r", "imported_g"],
1245 findFirst=True
1246 )
1247 ), expectedDeduplicatedBiases
1248 )
1249 # Materialize the bias dataset queries (only) by putting the results
1250 # into temporary tables, then repeat those tests.
1251 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1252 findFirst=False).materialize() as biases:
1253 self.assertCountEqual(list(biases), expectedAllBiases)
1254 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1255 findFirst=True).materialize() as biases:
1256 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1257 # Materialize the data ID subset query, but not the dataset queries.
1258 with subsetDataIds.materialize() as subsetDataIds:
1259 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1260 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1261 self.assertCountEqual(
1262 list(
1263 subsetDataIds.findDatasets(
1264 bias,
1265 collections=["imported_r", "imported_g"],
1266 findFirst=False
1267 )
1268 ),
1269 expectedAllBiases
1270 )
1271 self.assertCountEqual(
1272 list(
1273 subsetDataIds.findDatasets(
1274 bias,
1275 collections=["imported_r", "imported_g"],
1276 findFirst=True
1277 )
1278 ), expectedDeduplicatedBiases
1279 )
1280 # Materialize the dataset queries, too.
1281 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1282 findFirst=False).materialize() as biases:
1283 self.assertCountEqual(list(biases), expectedAllBiases)
1284 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1285 findFirst=True).materialize() as biases:
1286 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1287 # Materialize the original query, but none of the follow-up queries.
1288 with dataIds.materialize() as dataIds:
1289 self.assertEqual(dataIds.graph, expectedGraph)
1290 self.assertEqual(dataIds.toSet(), expectedDataIds)
1291 self.assertCountEqual(
1292 list(
1293 dataIds.findDatasets(
1294 flat,
1295 collections=["imported_r"],
1296 )
1297 ),
1298 expectedFlats,
1299 )
1300 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1301 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1302 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1303 self.assertCountEqual(
1304 list(
1305 subsetDataIds.findDatasets(
1306 bias,
1307 collections=["imported_r", "imported_g"],
1308 findFirst=False
1309 )
1310 ),
1311 expectedAllBiases
1312 )
1313 self.assertCountEqual(
1314 list(
1315 subsetDataIds.findDatasets(
1316 bias,
1317 collections=["imported_r", "imported_g"],
1318 findFirst=True
1319 )
1320 ), expectedDeduplicatedBiases
1321 )
1322 # Materialize just the bias dataset queries.
1323 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1324 findFirst=False).materialize() as biases:
1325 self.assertCountEqual(list(biases), expectedAllBiases)
1326 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1327 findFirst=True).materialize() as biases:
1328 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1329 # Materialize the subset data ID query, but not the dataset
1330 # queries.
1331 with subsetDataIds.materialize() as subsetDataIds:
1332 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1333 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1334 self.assertCountEqual(
1335 list(
1336 subsetDataIds.findDatasets(
1337 bias,
1338 collections=["imported_r", "imported_g"],
1339 findFirst=False
1340 )
1341 ),
1342 expectedAllBiases
1343 )
1344 self.assertCountEqual(
1345 list(
1346 subsetDataIds.findDatasets(
1347 bias,
1348 collections=["imported_r", "imported_g"],
1349 findFirst=True
1350 )
1351 ), expectedDeduplicatedBiases
1352 )
1353 # Materialize the bias dataset queries, too, so now we're
1354 # materializing every single step.
1355 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1356 findFirst=False).materialize() as biases:
1357 self.assertCountEqual(list(biases), expectedAllBiases)
1358 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1359 findFirst=True).materialize() as biases:
1360 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1362 def testEmptyDimensionsQueries(self):
1363 """Test Query and QueryResults objects in the case where there are no
1364 dimensions.
1365 """
1366 # Set up test data: one dataset type, two runs, one dataset in each.
1367 registry = self.makeRegistry()
1368 self.loadData(registry, "base.yaml")
1369 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1370 registry.registerDatasetType(schema)
1371 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1372 run1 = "run1"
1373 run2 = "run2"
1374 registry.registerRun(run1)
1375 registry.registerRun(run2)
1376 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1377 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1378 # Query directly for both of the datasets, and each one, one at a time.
1379 self.assertCountEqual(
1380 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1381 [dataset1, dataset2]
1382 )
1383 self.assertEqual(
1384 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1385 [dataset1],
1386 )
1387 self.assertEqual(
1388 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1389 [dataset2],
1390 )
1391 # Query for data IDs with no dimensions.
1392 dataIds = registry.queryDataIds([])
1393 self.assertEqual(
1394 dataIds.toSequence(),
1395 DataCoordinateSequence([dataId], registry.dimensions.empty)
1396 )
1397 # Use queried data IDs to find the datasets.
1398 self.assertCountEqual(
1399 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1400 [dataset1, dataset2],
1401 )
1402 self.assertEqual(
1403 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1404 [dataset1],
1405 )
1406 self.assertEqual(
1407 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1408 [dataset2],
1409 )
1410 # Now materialize the data ID query results and repeat those tests.
1411 with dataIds.materialize() as dataIds:
1412 self.assertEqual(
1413 dataIds.toSequence(),
1414 DataCoordinateSequence([dataId], registry.dimensions.empty)
1415 )
1416 self.assertCountEqual(
1417 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1418 [dataset1, dataset2],
1419 )
1420 self.assertEqual(
1421 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1422 [dataset1],
1423 )
1424 self.assertEqual(
1425 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1426 [dataset2],
1427 )
1428 # Query for non-empty data IDs, then subset that to get the empty one.
1429 # Repeat the above tests starting from that.
1430 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1431 self.assertEqual(
1432 dataIds.toSequence(),
1433 DataCoordinateSequence([dataId], registry.dimensions.empty)
1434 )
1435 self.assertCountEqual(
1436 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1437 [dataset1, dataset2],
1438 )
1439 self.assertEqual(
1440 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1441 [dataset1],
1442 )
1443 self.assertEqual(
1444 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1445 [dataset2],
1446 )
1447 with dataIds.materialize() as dataIds:
1448 self.assertEqual(
1449 dataIds.toSequence(),
1450 DataCoordinateSequence([dataId], registry.dimensions.empty)
1451 )
1452 self.assertCountEqual(
1453 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1454 [dataset1, dataset2],
1455 )
1456 self.assertEqual(
1457 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1458 [dataset1],
1459 )
1460 self.assertEqual(
1461 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1462 [dataset2],
1463 )
1464 # Query for non-empty data IDs, then materialize, then subset to get
1465 # the empty one. Repeat again.
1466 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1467 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1468 self.assertEqual(
1469 dataIds.toSequence(),
1470 DataCoordinateSequence([dataId], registry.dimensions.empty)
1471 )
1472 self.assertCountEqual(
1473 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1474 [dataset1, dataset2],
1475 )
1476 self.assertEqual(
1477 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1478 [dataset1],
1479 )
1480 self.assertEqual(
1481 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1482 [dataset2],
1483 )
1484 with dataIds.materialize() as dataIds:
1485 self.assertEqual(
1486 dataIds.toSequence(),
1487 DataCoordinateSequence([dataId], registry.dimensions.empty)
1488 )
1489 self.assertCountEqual(
1490 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1491 [dataset1, dataset2],
1492 )
1493 self.assertEqual(
1494 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1495 [dataset1],
1496 )
1497 self.assertEqual(
1498 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1499 [dataset2],
1500 )
1502 def testCalibrationCollections(self):
1503 """Test operations on `~CollectionType.CALIBRATION` collections,
1504 including `Registry.certify`, `Registry.decertify`, and
1505 `Registry.findDataset`.
1506 """
1507 # Setup - make a Registry, fill it with some datasets in
1508 # non-calibration collections.
1509 registry = self.makeRegistry()
1510 self.loadData(registry, "base.yaml")
1511 self.loadData(registry, "datasets.yaml")
1512 # Set up some timestamps.
1513 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1514 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1515 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1516 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1517 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1518 allTimespans = [
1519 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1520 ]
1521 # Get references to some datasets.
1522 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1523 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1524 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1525 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1526 # Register the main calibration collection we'll be working with.
1527 collection = "Cam1/calibs/default"
1528 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1529 # Cannot associate into a calibration collection (no timespan).
1530 with self.assertRaises(TypeError):
1531 registry.associate(collection, [bias2a])
1532 # Certify 2a dataset with [t2, t4) validity.
1533 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1534 # We should not be able to certify 2b with anything overlapping that
1535 # window.
1536 with self.assertRaises(ConflictingDefinitionError):
1537 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1538 with self.assertRaises(ConflictingDefinitionError):
1539 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1540 with self.assertRaises(ConflictingDefinitionError):
1541 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1542 with self.assertRaises(ConflictingDefinitionError):
1543 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1544 with self.assertRaises(ConflictingDefinitionError):
1545 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1546 with self.assertRaises(ConflictingDefinitionError):
1547 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1548 with self.assertRaises(ConflictingDefinitionError):
1549 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1550 with self.assertRaises(ConflictingDefinitionError):
1551 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1552 # We should be able to certify 3a with a range overlapping that window,
1553 # because it's for a different detector.
1554 # We'll certify 3a over [t1, t3).
1555 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1556 # Now we'll certify 2b and 3b together over [t4, ∞).
1557 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1559 # Fetch all associations and check that they are what we expect.
1560 self.assertCountEqual(
1561 list(
1562 registry.queryDatasetAssociations(
1563 "bias",
1564 collections=[collection, "imported_g", "imported_r"],
1565 )
1566 ),
1567 [
1568 DatasetAssociation(
1569 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1570 collection="imported_g",
1571 timespan=None,
1572 ),
1573 DatasetAssociation(
1574 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1575 collection="imported_r",
1576 timespan=None,
1577 ),
1578 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1579 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1580 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1581 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1582 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1583 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1584 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1585 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1586 ]
1587 )
1589 class Ambiguous:
1590 """Tag class to denote lookups that are expected to be ambiguous.
1591 """
1592 pass
1594 def assertLookup(detector: int, timespan: Timespan,
1595 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1596 """Local function that asserts that a bias lookup returns the given
1597 expected result.
1598 """
1599 if expected is Ambiguous:
1600 with self.assertRaises(RuntimeError):
1601 registry.findDataset("bias", collections=collection, instrument="Cam1",
1602 detector=detector, timespan=timespan)
1603 else:
1604 self.assertEqual(
1605 expected,
1606 registry.findDataset("bias", collections=collection, instrument="Cam1",
1607 detector=detector, timespan=timespan)
1608 )
1610 # Systematically test lookups against expected results.
1611 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1612 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1613 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1614 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1615 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1616 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1617 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1618 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1619 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1620 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1621 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1622 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1623 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1624 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1625 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1626 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1627 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1628 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1629 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1630 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1631 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1632 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1633 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1634 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1635 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1636 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1637 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1638 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1639 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1640 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1641 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1642 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1643 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1644 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1645 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1646 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1647 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1648 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1649 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1650 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1651 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1652 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1654 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1655 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1656 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1657 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1658 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1659 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1660 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1661 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1662 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1663 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1664 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1665 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1666 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1667 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1668 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1669 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1670 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1671 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1672 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1673 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1674 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1675 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1676 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1677 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1678 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1679 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1680 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1681 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1682 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1683 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1684 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1685 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1686 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1687 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1688 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1689 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1690 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1691 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1692 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1693 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1694 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1695 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1696 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1697 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1698 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1699 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1701 # Decertify everything, this time with explicit data IDs, then check
1702 # that no lookups succeed.
1703 registry.decertify(
1704 collection, "bias", Timespan(None, None),
1705 dataIds=[
1706 dict(instrument="Cam1", detector=2),
1707 dict(instrument="Cam1", detector=3),
1708 ]
1709 )
1710 for detector in (2, 3):
1711 for timespan in allTimespans:
1712 assertLookup(detector=detector, timespan=timespan, expected=None)
1713 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1714 # those.
1715 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1716 for timespan in allTimespans:
1717 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1718 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1719 # Decertify just bias2 over [t2, t4).
1720 # This should split a single certification row into two (and leave the
1721 # other existing row, for bias3a, alone).
1722 registry.decertify(collection, "bias", Timespan(t2, t4),
1723 dataIds=[dict(instrument="Cam1", detector=2)])
1724 for timespan in allTimespans:
1725 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1726 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1727 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1728 if overlapsBefore and overlapsAfter:
1729 expected = Ambiguous
1730 elif overlapsBefore or overlapsAfter:
1731 expected = bias2a
1732 else:
1733 expected = None
1734 assertLookup(detector=2, timespan=timespan, expected=expected)
1736 def testIngestTimeQuery(self):
1738 registry = self.makeRegistry()
1739 self.loadData(registry, "base.yaml")
1740 self.loadData(registry, "datasets.yaml")
1742 datasets = list(registry.queryDatasets(..., collections=...))
1743 len0 = len(datasets)
1744 self.assertGreater(len0, 0)
1746 where = "ingest_date > T'2000-01-01'"
1747 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1748 len1 = len(datasets)
1749 self.assertEqual(len0, len1)
1751 # no one will ever use this piece of software in 30 years
1752 where = "ingest_date > T'2050-01-01'"
1753 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1754 len2 = len(datasets)
1755 self.assertEqual(len2, 0)
1757 def testTimespanQueries(self):
1758 """Test query expressions involving timespans.
1759 """
1760 registry = self.makeRegistry()
1761 self.loadData(registry, "hsc-rc2-subset.yaml")
1762 # All exposures in the database; mapping from ID to timespan.
1763 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1764 # Just those IDs, sorted (which is also temporal sorting, because HSC
1765 # exposure IDs are monotonically increasing).
1766 ids = sorted(visits.keys())
1767 self.assertGreater(len(ids), 20)
1768 # Pick some quasi-random indexes into `ids` to play with.
1769 i1 = int(len(ids)*0.1)
1770 i2 = int(len(ids)*0.3)
1771 i3 = int(len(ids)*0.6)
1772 i4 = int(len(ids)*0.8)
1773 # Extract some times from those: just before the beginning of i1 (which
1774 # should be after the end of the exposure before), exactly the
1775 # beginning of i2, just after the beginning of i3 (and before its end),
1776 # and the exact end of i4.
1777 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1778 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1779 t2 = visits[ids[i2]].begin
1780 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1781 self.assertLess(t3, visits[ids[i3]].end)
1782 t4 = visits[ids[i4]].end
1783 # Make sure those are actually in order.
1784 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1786 bind = {
1787 "t1": t1,
1788 "t2": t2,
1789 "t3": t3,
1790 "t4": t4,
1791 "ts23": Timespan(t2, t3),
1792 }
1794 def query(where):
1795 """Helper function that queries for visit data IDs and returns
1796 results as a sorted, deduplicated list of visit IDs.
1797 """
1798 return sorted(
1799 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1800 instrument="HSC",
1801 bind=bind,
1802 where=where)}
1803 )
1805 # Try a bunch of timespan queries, mixing up the bounds themselves,
1806 # where they appear in the expression, and how we get the timespan into
1807 # the expression.
1809 # t1 is before the start of i1, so this should not include i1.
1810 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1811 # t2 is exactly at the start of i2, but ends are exclusive, so these
1812 # should not include i2.
1813 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1814 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1815 # t3 is in the middle of i3, so this should include i3.
1816 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1817 # This one should not include t3 by the same reasoning.
1818 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1819 # t4 is exactly at the end of i4, so this should include i4.
1820 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1821 # i4's upper bound of t4 is exclusive so this should not include t4.
1822 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1824 # Now some timespan vs. time scalar queries.
1825 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1826 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1827 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1828 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1829 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1830 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1832 # Empty timespans should not overlap anything.
1833 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
1835 def testCollectionSummaries(self):
1836 """Test recording and retrieval of collection summaries.
1837 """
1838 self.maxDiff = None
1839 registry = self.makeRegistry()
1840 # Importing datasets from yaml should go through the code path where
1841 # we update collection summaries as we insert datasets.
1842 self.loadData(registry, "base.yaml")
1843 self.loadData(registry, "datasets.yaml")
1844 flat = registry.getDatasetType("flat")
1845 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
1846 expected1.datasetTypes.add(registry.getDatasetType("bias"))
1847 expected1.datasetTypes.add(flat)
1848 expected1.dimensions.update_extract(
1849 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
1850 )
1851 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1852 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1853 # Create a chained collection with both of the imported runs; the
1854 # summary should be the same, because it's a union with itself.
1855 chain = "chain"
1856 registry.registerCollection(chain, CollectionType.CHAINED)
1857 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
1858 self.assertEqual(registry.getCollectionSummary(chain), expected1)
1859 # Associate flats only into a tagged collection and a calibration
1860 # collection to check summaries of those.
1861 tag = "tag"
1862 registry.registerCollection(tag, CollectionType.TAGGED)
1863 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
1864 calibs = "calibs"
1865 registry.registerCollection(calibs, CollectionType.CALIBRATION)
1866 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
1867 timespan=Timespan(None, None))
1868 expected2 = expected1.copy()
1869 expected2.datasetTypes.discard("bias")
1870 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1871 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1872 # Explicitly calling Registry.refresh() should load those same
1873 # summaries, via a totally different code path.
1874 registry.refresh()
1875 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1876 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1877 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1878 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1880 def testUnrelatedDimensionQueries(self):
1881 """Test that WHERE expressions in queries can reference dimensions that
1882 are not in the result set.
1883 """
1884 registry = self.makeRegistry()
1885 # There is no data to back this query, but it should still return
1886 # zero records instead of raising.
1887 self.assertFalse(
1888 set(registry.queryDataIds(["visit", "detector"],
1889 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
1890 )