Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from .._registry import (
56 CollectionSummary,
57 CollectionType,
58 ConflictingDefinitionError,
59 InconsistentDataIdError,
60 OrphanedRecordError,
61 Registry,
62 RegistryConfig,
63)
64from .._exceptions import MissingCollectionError
65from ..interfaces import ButlerAttributeExistsError
68class RegistryTests(ABC):
69 """Generic tests for the `Registry` class that can be subclassed to
70 generate tests for different configurations.
71 """
73 collectionsManager: Optional[str] = None
74 """Name of the collections manager class, if subclass provides value for
75 this member then it overrides name specified in default configuration
76 (`str`).
77 """
79 @classmethod
80 @abstractmethod
81 def getDataDir(cls) -> str:
82 """Return the root directory containing test data YAML files.
83 """
84 raise NotImplementedError()
86 def makeRegistryConfig(self) -> RegistryConfig:
87 """Create RegistryConfig used to create a registry.
89 This method should be called by a subclass from `makeRegistry`.
90 Returned instance will be pre-configured based on the values of class
91 members, and default-configured for all other parametrs. Subclasses
92 that need default configuration should just instantiate
93 `RegistryConfig` directly.
94 """
95 config = RegistryConfig()
96 if self.collectionsManager:
97 config["managers"]["collections"] = self.collectionsManager
98 return config
100 @abstractmethod
101 def makeRegistry(self) -> Registry:
102 """Return the Registry instance to be tested.
103 """
104 raise NotImplementedError()
106 def loadData(self, registry: Registry, filename: str):
107 """Load registry test data from ``getDataDir/<filename>``,
108 which should be a YAML import/export file.
109 """
110 from ...transfers import YamlRepoImportBackend
111 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
112 backend = YamlRepoImportBackend(stream, registry)
113 backend.register()
114 backend.load(datastore=None)
116 def testOpaque(self):
117 """Tests for `Registry.registerOpaqueTable`,
118 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
119 `Registry.deleteOpaqueData`.
120 """
121 registry = self.makeRegistry()
122 table = "opaque_table_for_testing"
123 registry.registerOpaqueTable(
124 table,
125 spec=ddl.TableSpec(
126 fields=[
127 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
128 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
129 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
130 ],
131 )
132 )
133 rows = [
134 {"id": 1, "name": "one", "count": None},
135 {"id": 2, "name": "two", "count": 5},
136 {"id": 3, "name": "three", "count": 6},
137 ]
138 registry.insertOpaqueData(table, *rows)
139 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
140 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
141 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
142 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
143 registry.deleteOpaqueData(table, id=3)
144 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
145 registry.deleteOpaqueData(table)
146 self.assertEqual([], list(registry.fetchOpaqueData(table)))
148 def testDatasetType(self):
149 """Tests for `Registry.registerDatasetType` and
150 `Registry.getDatasetType`.
151 """
152 registry = self.makeRegistry()
153 # Check valid insert
154 datasetTypeName = "test"
155 storageClass = StorageClass("testDatasetType")
156 registry.storageClasses.registerStorageClass(storageClass)
157 dimensions = registry.dimensions.extract(("instrument", "visit"))
158 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
159 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
160 # Inserting for the first time should return True
161 self.assertTrue(registry.registerDatasetType(inDatasetType))
162 outDatasetType1 = registry.getDatasetType(datasetTypeName)
163 self.assertEqual(outDatasetType1, inDatasetType)
165 # Re-inserting should work
166 self.assertFalse(registry.registerDatasetType(inDatasetType))
167 # Except when they are not identical
168 with self.assertRaises(ConflictingDefinitionError):
169 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
170 registry.registerDatasetType(nonIdenticalDatasetType)
172 # Template can be None
173 datasetTypeName = "testNoneTemplate"
174 storageClass = StorageClass("testDatasetType2")
175 registry.storageClasses.registerStorageClass(storageClass)
176 dimensions = registry.dimensions.extract(("instrument", "visit"))
177 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
178 registry.registerDatasetType(inDatasetType)
179 outDatasetType2 = registry.getDatasetType(datasetTypeName)
180 self.assertEqual(outDatasetType2, inDatasetType)
182 allTypes = set(registry.queryDatasetTypes())
183 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
185 def testDimensions(self):
186 """Tests for `Registry.insertDimensionData`,
187 `Registry.syncDimensionData`, and `Registry.expandDataId`.
188 """
189 registry = self.makeRegistry()
190 dimensionName = "instrument"
191 dimension = registry.dimensions[dimensionName]
192 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
193 "class_name": "lsst.obs.base.Instrument"}
194 registry.insertDimensionData(dimensionName, dimensionValue)
195 # Inserting the same value twice should fail
196 with self.assertRaises(sqlalchemy.exc.IntegrityError):
197 registry.insertDimensionData(dimensionName, dimensionValue)
198 # expandDataId should retrieve the record we just inserted
199 self.assertEqual(
200 registry.expandDataId(
201 instrument="DummyCam",
202 graph=dimension.graph
203 ).records[dimensionName].toDict(),
204 dimensionValue
205 )
206 # expandDataId should raise if there is no record with the given ID.
207 with self.assertRaises(LookupError):
208 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
209 # band doesn't have a table; insert should fail.
210 with self.assertRaises(TypeError):
211 registry.insertDimensionData("band", {"band": "i"})
212 dimensionName2 = "physical_filter"
213 dimension2 = registry.dimensions[dimensionName2]
214 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
215 # Missing required dependency ("instrument") should fail
216 with self.assertRaises(KeyError):
217 registry.insertDimensionData(dimensionName2, dimensionValue2)
218 # Adding required dependency should fix the failure
219 dimensionValue2["instrument"] = "DummyCam"
220 registry.insertDimensionData(dimensionName2, dimensionValue2)
221 # expandDataId should retrieve the record we just inserted.
222 self.assertEqual(
223 registry.expandDataId(
224 instrument="DummyCam", physical_filter="DummyCam_i",
225 graph=dimension2.graph
226 ).records[dimensionName2].toDict(),
227 dimensionValue2
228 )
229 # Use syncDimensionData to insert a new record successfully.
230 dimensionName3 = "detector"
231 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
232 "name_in_raft": "zero", "purpose": "SCIENCE"}
233 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
234 # Sync that again. Note that one field ("raft") is NULL, and that
235 # should be okay.
236 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
237 # Now try that sync with the same primary key but a different value.
238 # This should fail.
239 with self.assertRaises(ConflictingDefinitionError):
240 registry.syncDimensionData(
241 dimensionName3,
242 {"instrument": "DummyCam", "id": 1, "full_name": "one",
243 "name_in_raft": "four", "purpose": "SCIENCE"}
244 )
246 @unittest.skipIf(np is None, "numpy not available.")
247 def testNumpyDataId(self):
248 """Test that we can use a numpy int in a dataId."""
249 registry = self.makeRegistry()
250 dimensionEntries = [
251 ("instrument", {"instrument": "DummyCam"}),
252 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
253 # Using an np.int64 here fails unless Records.fromDict is also
254 # patched to look for numbers.Integral
255 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
256 ]
257 for args in dimensionEntries:
258 registry.insertDimensionData(*args)
260 # Try a normal integer and something that looks like an int but
261 # is not.
262 for visit_id in (42, np.int64(42)):
263 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
264 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
265 self.assertEqual(expanded["visit"], int(visit_id))
266 self.assertIsInstance(expanded["visit"], int)
268 def testDataIdRelationships(self):
269 """Test that `Registry.expandDataId` raises an exception when the given
270 keys are inconsistent.
271 """
272 registry = self.makeRegistry()
273 self.loadData(registry, "base.yaml")
274 # Insert a few more dimension records for the next test.
275 registry.insertDimensionData(
276 "exposure",
277 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
278 )
279 registry.insertDimensionData(
280 "exposure",
281 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
282 )
283 registry.insertDimensionData(
284 "visit_system",
285 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
286 )
287 registry.insertDimensionData(
288 "visit",
289 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
290 )
291 registry.insertDimensionData(
292 "visit_definition",
293 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
294 )
295 with self.assertRaises(InconsistentDataIdError):
296 registry.expandDataId(
297 {"instrument": "Cam1", "visit": 1, "exposure": 2},
298 )
300 def testDataset(self):
301 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
302 and `Registry.removeDatasets`.
303 """
304 registry = self.makeRegistry()
305 self.loadData(registry, "base.yaml")
306 run = "test"
307 registry.registerRun(run)
308 datasetType = registry.getDatasetType("bias")
309 dataId = {"instrument": "Cam1", "detector": 2}
310 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
311 outRef = registry.getDataset(ref.id)
312 self.assertIsNotNone(ref.id)
313 self.assertEqual(ref, outRef)
314 with self.assertRaises(ConflictingDefinitionError):
315 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
316 registry.removeDatasets([ref])
317 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
319 def testFindDataset(self):
320 """Tests for `Registry.findDataset`.
321 """
322 registry = self.makeRegistry()
323 self.loadData(registry, "base.yaml")
324 run = "test"
325 datasetType = registry.getDatasetType("bias")
326 dataId = {"instrument": "Cam1", "detector": 4}
327 registry.registerRun(run)
328 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
329 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
330 self.assertEqual(outputRef, inputRef)
331 # Check that retrieval with invalid dataId raises
332 with self.assertRaises(LookupError):
333 dataId = {"instrument": "Cam1"} # no detector
334 registry.findDataset(datasetType, dataId, collections=run)
335 # Check that different dataIds match to different datasets
336 dataId1 = {"instrument": "Cam1", "detector": 1}
337 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
338 dataId2 = {"instrument": "Cam1", "detector": 2}
339 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
340 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
341 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
342 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
343 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
344 # Check that requesting a non-existing dataId returns None
345 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
346 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
348 def testRemoveDatasetTypeSuccess(self):
349 """Test that Registry.removeDatasetType works when there are no
350 datasets of that type present.
351 """
352 registry = self.makeRegistry()
353 self.loadData(registry, "base.yaml")
354 registry.removeDatasetType("flat")
355 with self.assertRaises(KeyError):
356 registry.getDatasetType("flat")
358 def testRemoveDatasetTypeFailure(self):
359 """Test that Registry.removeDatasetType raises when there are datasets
360 of that type present or if the dataset type is for a component.
361 """
362 registry = self.makeRegistry()
363 self.loadData(registry, "base.yaml")
364 self.loadData(registry, "datasets.yaml")
365 with self.assertRaises(OrphanedRecordError):
366 registry.removeDatasetType("flat")
367 with self.assertRaises(ValueError):
368 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
370 def testDatasetTypeComponentQueries(self):
371 """Test component options when querying for dataset types.
372 """
373 registry = self.makeRegistry()
374 self.loadData(registry, "base.yaml")
375 self.loadData(registry, "datasets.yaml")
376 # Test querying for dataset types with different inputs.
377 # First query for all dataset types; components should only be included
378 # when components=True.
379 self.assertEqual(
380 {"bias", "flat"},
381 NamedValueSet(registry.queryDatasetTypes()).names
382 )
383 self.assertEqual(
384 {"bias", "flat"},
385 NamedValueSet(registry.queryDatasetTypes(components=False)).names
386 )
387 self.assertLess(
388 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
389 NamedValueSet(registry.queryDatasetTypes(components=True)).names
390 )
391 # Use a pattern that can match either parent or components. Again,
392 # components are only returned if components=True.
393 self.assertEqual(
394 {"bias"},
395 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
396 )
397 self.assertEqual(
398 {"bias"},
399 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
400 )
401 self.assertLess(
402 {"bias", "bias.wcs"},
403 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
404 )
405 # This pattern matches only a component. In this case we also return
406 # that component dataset type if components=None.
407 self.assertEqual(
408 {"bias.wcs"},
409 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
410 )
411 self.assertEqual(
412 set(),
413 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
414 )
415 self.assertEqual(
416 {"bias.wcs"},
417 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
418 )
419 # Add a dataset type using a StorageClass that we'll then remove; check
420 # that this does not affect our ability to query for dataset types
421 # (though it will warn).
422 tempStorageClass = StorageClass(
423 name="TempStorageClass",
424 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
425 )
426 registry.storageClasses.registerStorageClass(tempStorageClass)
427 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
428 universe=registry.dimensions)
429 registry.registerDatasetType(datasetType)
430 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
431 datasetType._storageClass = None
432 del tempStorageClass
433 # Querying for all dataset types, including components, should include
434 # at least all non-component dataset types (and I don't want to
435 # enumerate all of the Exposure components for bias and flat here).
436 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
437 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
438 self.assertIn("TempStorageClass", cm.output[0])
439 self.assertLess({"bias", "flat", "temporary"}, everything.names)
440 # It should not include "temporary.columns", because we tried to remove
441 # the storage class that would tell it about that. So if the next line
442 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
443 # this part of the test isn't doing anything, because the _unregister
444 # call about isn't simulating the real-life case we want it to
445 # simulate, in which different versions of daf_butler in entirely
446 # different Python processes interact with the same repo.
447 self.assertNotIn("temporary.data", everything.names)
448 # Query for dataset types that start with "temp". This should again
449 # not include the component, and also not fail.
450 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
451 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
452 self.assertIn("TempStorageClass", cm.output[0])
453 self.assertEqual({"temporary"}, startsWithTemp.names)
455 def testComponentLookups(self):
456 """Test searching for component datasets via their parents.
457 """
458 registry = self.makeRegistry()
459 self.loadData(registry, "base.yaml")
460 self.loadData(registry, "datasets.yaml")
461 # Test getting the child dataset type (which does still exist in the
462 # Registry), and check for consistency with
463 # DatasetRef.makeComponentRef.
464 collection = "imported_g"
465 parentType = registry.getDatasetType("bias")
466 childType = registry.getDatasetType("bias.wcs")
467 parentRefResolved = registry.findDataset(parentType, collections=collection,
468 instrument="Cam1", detector=1)
469 self.assertIsInstance(parentRefResolved, DatasetRef)
470 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
471 # Search for a single dataset with findDataset.
472 childRef1 = registry.findDataset("bias.wcs", collections=collection,
473 dataId=parentRefResolved.dataId)
474 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
475 # Search for detector data IDs constrained by component dataset
476 # existence with queryDataIds.
477 dataIds = registry.queryDataIds(
478 ["detector"],
479 datasets=["bias.wcs"],
480 collections=collection,
481 ).toSet()
482 self.assertEqual(
483 dataIds,
484 DataCoordinateSet(
485 {
486 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
487 for d in (1, 2, 3)
488 },
489 parentType.dimensions,
490 )
491 )
492 # Search for multiple datasets of a single type with queryDatasets.
493 childRefs2 = set(registry.queryDatasets(
494 "bias.wcs",
495 collections=collection,
496 ))
497 self.assertEqual(
498 {ref.unresolved() for ref in childRefs2},
499 {DatasetRef(childType, dataId) for dataId in dataIds}
500 )
502 def testCollections(self):
503 """Tests for registry methods that manage collections.
504 """
505 registry = self.makeRegistry()
506 self.loadData(registry, "base.yaml")
507 self.loadData(registry, "datasets.yaml")
508 run1 = "imported_g"
509 run2 = "imported_r"
510 # Test setting a collection docstring after it has been created.
511 registry.setCollectionDocumentation(run1, "doc for run1")
512 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
513 registry.setCollectionDocumentation(run1, None)
514 self.assertIsNone(registry.getCollectionDocumentation(run1))
515 datasetType = "bias"
516 # Find some datasets via their run's collection.
517 dataId1 = {"instrument": "Cam1", "detector": 1}
518 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
519 self.assertIsNotNone(ref1)
520 dataId2 = {"instrument": "Cam1", "detector": 2}
521 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
522 self.assertIsNotNone(ref2)
523 # Associate those into a new collection,then look for them there.
524 tag1 = "tag1"
525 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
526 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
527 registry.associate(tag1, [ref1, ref2])
528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
530 # Disassociate one and verify that we can't it there anymore...
531 registry.disassociate(tag1, [ref1])
532 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
533 # ...but we can still find ref2 in tag1, and ref1 in the run.
534 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
535 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
536 collections = set(registry.queryCollections())
537 self.assertEqual(collections, {run1, run2, tag1})
538 # Associate both refs into tag1 again; ref2 is already there, but that
539 # should be a harmless no-op.
540 registry.associate(tag1, [ref1, ref2])
541 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
542 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
543 # Get a different dataset (from a different run) that has the same
544 # dataset type and data ID as ref2.
545 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
546 self.assertNotEqual(ref2, ref2b)
547 # Attempting to associate that into tag1 should be an error.
548 with self.assertRaises(ConflictingDefinitionError):
549 registry.associate(tag1, [ref2b])
550 # That error shouldn't have messed up what we had before.
551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
552 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
553 # Attempt to associate the conflicting dataset again, this time with
554 # a dataset that isn't in the collection and won't cause a conflict.
555 # Should also fail without modifying anything.
556 dataId3 = {"instrument": "Cam1", "detector": 3}
557 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
558 with self.assertRaises(ConflictingDefinitionError):
559 registry.associate(tag1, [ref3, ref2b])
560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
562 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
563 # Register a chained collection that searches [tag1, run2]
564 chain1 = "chain1"
565 registry.registerCollection(chain1, type=CollectionType.CHAINED)
566 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
567 # Chained collection exists, but has no collections in it.
568 self.assertFalse(registry.getCollectionChain(chain1))
569 # If we query for all collections, we should get the chained collection
570 # only if we don't ask to flatten it (i.e. yield only its children).
571 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
572 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
573 # Attempt to set its child collections to something circular; that
574 # should fail.
575 with self.assertRaises(ValueError):
576 registry.setCollectionChain(chain1, [tag1, chain1])
577 # Add the child collections.
578 registry.setCollectionChain(chain1, [tag1, run2])
579 self.assertEqual(
580 list(registry.getCollectionChain(chain1)),
581 [tag1, run2]
582 )
583 # Searching for dataId1 or dataId2 in the chain should return ref1 and
584 # ref2, because both are in tag1.
585 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
586 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
587 # Now disassociate ref2 from tag1. The search (for bias) with
588 # dataId2 in chain1 should then:
589 # 1. not find it in tag1
590 # 2. find a different dataset in run2
591 registry.disassociate(tag1, [ref2])
592 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
593 self.assertNotEqual(ref2b, ref2)
594 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
595 # Define a new chain so we can test recursive chains.
596 chain2 = "chain2"
597 registry.registerCollection(chain2, type=CollectionType.CHAINED)
598 registry.setCollectionChain(chain2, [run2, chain1])
599 # Query for collections matching a regex.
600 self.assertCountEqual(
601 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
602 ["imported_r", "imported_g"]
603 )
604 # Query for collections matching a regex or an explicit str.
605 self.assertCountEqual(
606 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
607 ["imported_r", "imported_g", "chain1"]
608 )
609 # Search for bias with dataId1 should find it via tag1 in chain2,
610 # recursing, because is not in run1.
611 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
612 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
613 # Search for bias with dataId2 should find it in run2 (ref2b).
614 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
615 # Search for a flat that is in run2. That should not be found
616 # at the front of chain2, because of the restriction to bias
617 # on run2 there, but it should be found in at the end of chain1.
618 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
619 ref4 = registry.findDataset("flat", dataId4, collections=run2)
620 self.assertIsNotNone(ref4)
621 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
622 # Deleting a collection that's part of a CHAINED collection is not
623 # allowed, and is exception-safe.
624 with self.assertRaises(Exception):
625 registry.removeCollection(run2)
626 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
627 with self.assertRaises(Exception):
628 registry.removeCollection(chain1)
629 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
630 # Actually remove chain2, test that it's gone by asking for its type.
631 registry.removeCollection(chain2)
632 with self.assertRaises(MissingCollectionError):
633 registry.getCollectionType(chain2)
634 # Actually remove run2 and chain1, which should work now.
635 registry.removeCollection(chain1)
636 registry.removeCollection(run2)
637 with self.assertRaises(MissingCollectionError):
638 registry.getCollectionType(run2)
639 with self.assertRaises(MissingCollectionError):
640 registry.getCollectionType(chain1)
641 # Remove tag1 as well, just to test that we can remove TAGGED
642 # collections.
643 registry.removeCollection(tag1)
644 with self.assertRaises(MissingCollectionError):
645 registry.getCollectionType(tag1)
647 def testBasicTransaction(self):
648 """Test that all operations within a single transaction block are
649 rolled back if an exception propagates out of the block.
650 """
651 registry = self.makeRegistry()
652 storageClass = StorageClass("testDatasetType")
653 registry.storageClasses.registerStorageClass(storageClass)
654 with registry.transaction():
655 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
656 with self.assertRaises(ValueError):
657 with registry.transaction():
658 registry.insertDimensionData("instrument", {"name": "Cam2"})
659 raise ValueError("Oops, something went wrong")
660 # Cam1 should exist
661 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
662 # But Cam2 and Cam3 should both not exist
663 with self.assertRaises(LookupError):
664 registry.expandDataId(instrument="Cam2")
665 with self.assertRaises(LookupError):
666 registry.expandDataId(instrument="Cam3")
668 def testNestedTransaction(self):
669 """Test that operations within a transaction block are not rolled back
670 if an exception propagates out of an inner transaction block and is
671 then caught.
672 """
673 registry = self.makeRegistry()
674 dimension = registry.dimensions["instrument"]
675 dataId1 = {"instrument": "DummyCam"}
676 dataId2 = {"instrument": "DummyCam2"}
677 checkpointReached = False
678 with registry.transaction():
679 # This should be added and (ultimately) committed.
680 registry.insertDimensionData(dimension, dataId1)
681 with self.assertRaises(sqlalchemy.exc.IntegrityError):
682 with registry.transaction(savepoint=True):
683 # This does not conflict, and should succeed (but not
684 # be committed).
685 registry.insertDimensionData(dimension, dataId2)
686 checkpointReached = True
687 # This should conflict and raise, triggerring a rollback
688 # of the previous insertion within the same transaction
689 # context, but not the original insertion in the outer
690 # block.
691 registry.insertDimensionData(dimension, dataId1)
692 self.assertTrue(checkpointReached)
693 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
694 with self.assertRaises(LookupError):
695 registry.expandDataId(dataId2, graph=dimension.graph)
697 def testInstrumentDimensions(self):
698 """Test queries involving only instrument dimensions, with no joins to
699 skymap."""
700 registry = self.makeRegistry()
702 # need a bunch of dimensions and datasets for test
703 registry.insertDimensionData(
704 "instrument",
705 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
706 )
707 registry.insertDimensionData(
708 "physical_filter",
709 dict(instrument="DummyCam", name="dummy_r", band="r"),
710 dict(instrument="DummyCam", name="dummy_i", band="i"),
711 )
712 registry.insertDimensionData(
713 "detector",
714 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
715 )
716 registry.insertDimensionData(
717 "visit_system",
718 dict(instrument="DummyCam", id=1, name="default"),
719 )
720 registry.insertDimensionData(
721 "visit",
722 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
723 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
724 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
725 )
726 registry.insertDimensionData(
727 "exposure",
728 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
729 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
730 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
731 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
732 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
733 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
734 )
735 registry.insertDimensionData(
736 "visit_definition",
737 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
738 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
739 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
740 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
741 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
742 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
743 )
744 # dataset types
745 run1 = "test1_r"
746 run2 = "test2_r"
747 tagged2 = "test2_t"
748 registry.registerRun(run1)
749 registry.registerRun(run2)
750 registry.registerCollection(tagged2)
751 storageClass = StorageClass("testDataset")
752 registry.storageClasses.registerStorageClass(storageClass)
753 rawType = DatasetType(name="RAW",
754 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
755 storageClass=storageClass)
756 registry.registerDatasetType(rawType)
757 calexpType = DatasetType(name="CALEXP",
758 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
759 storageClass=storageClass)
760 registry.registerDatasetType(calexpType)
762 # add pre-existing datasets
763 for exposure in (100, 101, 110, 111):
764 for detector in (1, 2, 3):
765 # note that only 3 of 5 detectors have datasets
766 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
767 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
768 # exposures 100 and 101 appear in both run1 and tagged2.
769 # 100 has different datasets in the different collections
770 # 101 has the same dataset in both collections.
771 if exposure == 100:
772 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
773 if exposure in (100, 101):
774 registry.associate(tagged2, [ref])
775 # Add pre-existing datasets to tagged2.
776 for exposure in (200, 201):
777 for detector in (3, 4, 5):
778 # note that only 3 of 5 detectors have datasets
779 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
780 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
781 registry.associate(tagged2, [ref])
783 dimensions = DimensionGraph(
784 registry.dimensions,
785 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
786 )
787 # Test that single dim string works as well as list of str
788 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
789 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
790 self.assertEqual(rows, rowsI)
791 # with empty expression
792 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
793 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
794 for dataId in rows:
795 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
796 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
797 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
798 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
799 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
800 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
801 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
802 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
803 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
804 (100, 101, 110, 111))
805 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
806 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
808 # second collection
809 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
810 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
811 for dataId in rows:
812 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
813 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
814 (100, 101, 200, 201))
815 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
816 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
818 # with two input datasets
819 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
820 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
821 for dataId in rows:
822 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
823 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
824 (100, 101, 110, 111, 200, 201))
825 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
826 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
828 # limit to single visit
829 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
830 where="visit = 10", instrument="DummyCam").toSet()
831 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
832 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
833 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
834 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
836 # more limiting expression, using link names instead of Table.column
837 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
838 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
839 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
840 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
841 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
842 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
844 # expression excludes everything
845 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
846 where="visit > 1000", instrument="DummyCam").toSet()
847 self.assertEqual(len(rows), 0)
849 # Selecting by physical_filter, this is not in the dimensions, but it
850 # is a part of the full expression so it should work too.
851 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
852 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
853 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
854 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
855 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
856 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
858 def testSkyMapDimensions(self):
859 """Tests involving only skymap dimensions, no joins to instrument."""
860 registry = self.makeRegistry()
862 # need a bunch of dimensions and datasets for test, we want
863 # "band" in the test so also have to add physical_filter
864 # dimensions
865 registry.insertDimensionData(
866 "instrument",
867 dict(instrument="DummyCam")
868 )
869 registry.insertDimensionData(
870 "physical_filter",
871 dict(instrument="DummyCam", name="dummy_r", band="r"),
872 dict(instrument="DummyCam", name="dummy_i", band="i"),
873 )
874 registry.insertDimensionData(
875 "skymap",
876 dict(name="DummyMap", hash="sha!".encode("utf8"))
877 )
878 for tract in range(10):
879 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
880 registry.insertDimensionData(
881 "patch",
882 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
883 for patch in range(10)]
884 )
886 # dataset types
887 run = "test"
888 registry.registerRun(run)
889 storageClass = StorageClass("testDataset")
890 registry.storageClasses.registerStorageClass(storageClass)
891 calexpType = DatasetType(name="deepCoadd_calexp",
892 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
893 "band")),
894 storageClass=storageClass)
895 registry.registerDatasetType(calexpType)
896 mergeType = DatasetType(name="deepCoadd_mergeDet",
897 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
898 storageClass=storageClass)
899 registry.registerDatasetType(mergeType)
900 measType = DatasetType(name="deepCoadd_meas",
901 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
902 "band")),
903 storageClass=storageClass)
904 registry.registerDatasetType(measType)
906 dimensions = DimensionGraph(
907 registry.dimensions,
908 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
909 | measType.dimensions.required)
910 )
912 # add pre-existing datasets
913 for tract in (1, 3, 5):
914 for patch in (2, 4, 6, 7):
915 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
916 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
917 for aFilter in ("i", "r"):
918 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
919 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
921 # with empty expression
922 rows = registry.queryDataIds(dimensions,
923 datasets=[calexpType, mergeType], collections=run).toSet()
924 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
925 for dataId in rows:
926 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
927 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
928 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
929 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
931 # limit to 2 tracts and 2 patches
932 rows = registry.queryDataIds(dimensions,
933 datasets=[calexpType, mergeType], collections=run,
934 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
935 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
936 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
937 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
938 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
940 # limit to single filter
941 rows = registry.queryDataIds(dimensions,
942 datasets=[calexpType, mergeType], collections=run,
943 where="band = 'i'").toSet()
944 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
945 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
946 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
947 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
949 # expression excludes everything, specifying non-existing skymap is
950 # not a fatal error, it's operator error
951 rows = registry.queryDataIds(dimensions,
952 datasets=[calexpType, mergeType], collections=run,
953 where="skymap = 'Mars'").toSet()
954 self.assertEqual(len(rows), 0)
956 def testSpatialJoin(self):
957 """Test queries that involve spatial overlap joins.
958 """
959 registry = self.makeRegistry()
960 self.loadData(registry, "hsc-rc2-subset.yaml")
962 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
963 # the TopologicalFamily they belong to. We'll relate all elements in
964 # each family to all of the elements in each other family.
965 families = defaultdict(set)
966 # Dictionary of {element.name: {dataId: region}}.
967 regions = {}
968 for element in registry.dimensions.getDatabaseElements():
969 if element.spatial is not None:
970 families[element.spatial.name].add(element)
971 regions[element.name] = {
972 record.dataId: record.region for record in registry.queryDimensionRecords(element)
973 }
975 # If this check fails, it's not necessarily a problem - it may just be
976 # a reasonable change to the default dimension definitions - but the
977 # test below depends on there being more than one family to do anything
978 # useful.
979 self.assertEqual(len(families), 2)
981 # Overlap DatabaseDimensionElements with each other.
982 for family1, family2 in itertools.combinations(families, 2):
983 for element1, element2 in itertools.product(families[family1], families[family2]):
984 graph = DimensionGraph.union(element1.graph, element2.graph)
985 # Construct expected set of overlapping data IDs via a
986 # brute-force comparison of the regions we've already fetched.
987 expected = {
988 DataCoordinate.standardize(
989 {**dataId1.byName(), **dataId2.byName()},
990 graph=graph
991 )
992 for (dataId1, region1), (dataId2, region2)
993 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
994 if not region1.isDisjointFrom(region2)
995 }
996 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
997 queried = set(registry.queryDataIds(graph))
998 self.assertEqual(expected, queried)
1000 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1001 commonSkyPix = registry.dimensions.commonSkyPix
1002 for elementName, regions in regions.items():
1003 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1004 expected = set()
1005 for dataId, region in regions.items():
1006 for begin, end in commonSkyPix.pixelization.envelope(region):
1007 expected.update(
1008 DataCoordinate.standardize(
1009 {commonSkyPix.name: index, **dataId.byName()},
1010 graph=graph
1011 )
1012 for index in range(begin, end)
1013 )
1014 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1015 queried = set(registry.queryDataIds(graph))
1016 self.assertEqual(expected, queried)
1018 def testAbstractQuery(self):
1019 """Test that we can run a query that just lists the known
1020 bands. This is tricky because band is
1021 backed by a query against physical_filter.
1022 """
1023 registry = self.makeRegistry()
1024 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1025 registry.insertDimensionData(
1026 "physical_filter",
1027 dict(instrument="DummyCam", name="dummy_i", band="i"),
1028 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1029 dict(instrument="DummyCam", name="dummy_r", band="r"),
1030 )
1031 rows = registry.queryDataIds(["band"]).toSet()
1032 self.assertCountEqual(
1033 rows,
1034 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1035 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1036 )
1038 def testAttributeManager(self):
1039 """Test basic functionality of attribute manager.
1040 """
1041 # number of attributes with schema versions in a fresh database,
1042 # 6 managers with 3 records per manager, plus config for dimensions
1043 VERSION_COUNT = 6 * 3 + 1
1045 registry = self.makeRegistry()
1046 attributes = registry._managers.attributes
1048 # check what get() returns for non-existing key
1049 self.assertIsNone(attributes.get("attr"))
1050 self.assertEqual(attributes.get("attr", ""), "")
1051 self.assertEqual(attributes.get("attr", "Value"), "Value")
1052 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1054 # cannot store empty key or value
1055 with self.assertRaises(ValueError):
1056 attributes.set("", "value")
1057 with self.assertRaises(ValueError):
1058 attributes.set("attr", "")
1060 # set value of non-existing key
1061 attributes.set("attr", "value")
1062 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1063 self.assertEqual(attributes.get("attr"), "value")
1065 # update value of existing key
1066 with self.assertRaises(ButlerAttributeExistsError):
1067 attributes.set("attr", "value2")
1069 attributes.set("attr", "value2", force=True)
1070 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1071 self.assertEqual(attributes.get("attr"), "value2")
1073 # delete existing key
1074 self.assertTrue(attributes.delete("attr"))
1075 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1077 # delete non-existing key
1078 self.assertFalse(attributes.delete("non-attr"))
1080 # store bunch of keys and get the list back
1081 data = [
1082 ("version.core", "1.2.3"),
1083 ("version.dimensions", "3.2.1"),
1084 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1085 ]
1086 for key, value in data:
1087 attributes.set(key, value)
1088 items = dict(attributes.items())
1089 for key, value in data:
1090 self.assertEqual(items[key], value)
1092 def testQueryDatasetsDeduplication(self):
1093 """Test that the findFirst option to queryDatasets selects datasets
1094 from collections in the order given".
1095 """
1096 registry = self.makeRegistry()
1097 self.loadData(registry, "base.yaml")
1098 self.loadData(registry, "datasets.yaml")
1099 self.assertCountEqual(
1100 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1101 [
1102 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1103 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1104 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1105 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1106 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1107 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1108 ]
1109 )
1110 self.assertCountEqual(
1111 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1112 findFirst=True)),
1113 [
1114 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1115 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1116 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1117 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1118 ]
1119 )
1120 self.assertCountEqual(
1121 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1122 findFirst=True)),
1123 [
1124 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1125 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1126 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1127 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1128 ]
1129 )
1131 def testQueryResults(self):
1132 """Test querying for data IDs and then manipulating the QueryResults
1133 object returned to perform other queries.
1134 """
1135 registry = self.makeRegistry()
1136 self.loadData(registry, "base.yaml")
1137 self.loadData(registry, "datasets.yaml")
1138 bias = registry.getDatasetType("bias")
1139 flat = registry.getDatasetType("flat")
1140 # Obtain expected results from methods other than those we're testing
1141 # here. That includes:
1142 # - the dimensions of the data IDs we want to query:
1143 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1144 # - the dimensions of some other data IDs we'll extract from that:
1145 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1146 # - the data IDs we expect to obtain from the first queries:
1147 expectedDataIds = DataCoordinateSet(
1148 {
1149 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1150 universe=registry.dimensions)
1151 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1152 },
1153 graph=expectedGraph,
1154 hasFull=False,
1155 hasRecords=False,
1156 )
1157 # - the flat datasets we expect to find from those data IDs, in just
1158 # one collection (so deduplication is irrelevant):
1159 expectedFlats = [
1160 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1161 collections="imported_r"),
1162 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1163 collections="imported_r"),
1164 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1165 collections="imported_r"),
1166 ]
1167 # - the data IDs we expect to extract from that:
1168 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1169 # - the bias datasets we expect to find from those data IDs, after we
1170 # subset-out the physical_filter dimension, both with duplicates:
1171 expectedAllBiases = [
1172 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1173 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1174 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1175 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1176 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1177 ]
1178 # - ...and without duplicates:
1179 expectedDeduplicatedBiases = [
1180 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1181 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1182 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1183 ]
1184 # Test against those expected results, using a "lazy" query for the
1185 # data IDs (which re-executes that query each time we use it to do
1186 # something new).
1187 dataIds = registry.queryDataIds(
1188 ["detector", "physical_filter"],
1189 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1190 instrument="Cam1",
1191 )
1192 self.assertEqual(dataIds.graph, expectedGraph)
1193 self.assertEqual(dataIds.toSet(), expectedDataIds)
1194 self.assertCountEqual(
1195 list(
1196 dataIds.findDatasets(
1197 flat,
1198 collections=["imported_r"],
1199 )
1200 ),
1201 expectedFlats,
1202 )
1203 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1204 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1205 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1206 self.assertCountEqual(
1207 list(
1208 subsetDataIds.findDatasets(
1209 bias,
1210 collections=["imported_r", "imported_g"],
1211 findFirst=False
1212 )
1213 ),
1214 expectedAllBiases
1215 )
1216 self.assertCountEqual(
1217 list(
1218 subsetDataIds.findDatasets(
1219 bias,
1220 collections=["imported_r", "imported_g"],
1221 findFirst=True
1222 )
1223 ), expectedDeduplicatedBiases
1224 )
1225 # Materialize the bias dataset queries (only) by putting the results
1226 # into temporary tables, then repeat those tests.
1227 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1228 findFirst=False).materialize() as biases:
1229 self.assertCountEqual(list(biases), expectedAllBiases)
1230 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1231 findFirst=True).materialize() as biases:
1232 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1233 # Materialize the data ID subset query, but not the dataset queries.
1234 with subsetDataIds.materialize() as subsetDataIds:
1235 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1236 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1237 self.assertCountEqual(
1238 list(
1239 subsetDataIds.findDatasets(
1240 bias,
1241 collections=["imported_r", "imported_g"],
1242 findFirst=False
1243 )
1244 ),
1245 expectedAllBiases
1246 )
1247 self.assertCountEqual(
1248 list(
1249 subsetDataIds.findDatasets(
1250 bias,
1251 collections=["imported_r", "imported_g"],
1252 findFirst=True
1253 )
1254 ), expectedDeduplicatedBiases
1255 )
1256 # Materialize the dataset queries, too.
1257 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1258 findFirst=False).materialize() as biases:
1259 self.assertCountEqual(list(biases), expectedAllBiases)
1260 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1261 findFirst=True).materialize() as biases:
1262 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1263 # Materialize the original query, but none of the follow-up queries.
1264 with dataIds.materialize() as dataIds:
1265 self.assertEqual(dataIds.graph, expectedGraph)
1266 self.assertEqual(dataIds.toSet(), expectedDataIds)
1267 self.assertCountEqual(
1268 list(
1269 dataIds.findDatasets(
1270 flat,
1271 collections=["imported_r"],
1272 )
1273 ),
1274 expectedFlats,
1275 )
1276 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1277 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1278 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1279 self.assertCountEqual(
1280 list(
1281 subsetDataIds.findDatasets(
1282 bias,
1283 collections=["imported_r", "imported_g"],
1284 findFirst=False
1285 )
1286 ),
1287 expectedAllBiases
1288 )
1289 self.assertCountEqual(
1290 list(
1291 subsetDataIds.findDatasets(
1292 bias,
1293 collections=["imported_r", "imported_g"],
1294 findFirst=True
1295 )
1296 ), expectedDeduplicatedBiases
1297 )
1298 # Materialize just the bias dataset queries.
1299 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1300 findFirst=False).materialize() as biases:
1301 self.assertCountEqual(list(biases), expectedAllBiases)
1302 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1303 findFirst=True).materialize() as biases:
1304 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1305 # Materialize the subset data ID query, but not the dataset
1306 # queries.
1307 with subsetDataIds.materialize() as subsetDataIds:
1308 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1309 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1310 self.assertCountEqual(
1311 list(
1312 subsetDataIds.findDatasets(
1313 bias,
1314 collections=["imported_r", "imported_g"],
1315 findFirst=False
1316 )
1317 ),
1318 expectedAllBiases
1319 )
1320 self.assertCountEqual(
1321 list(
1322 subsetDataIds.findDatasets(
1323 bias,
1324 collections=["imported_r", "imported_g"],
1325 findFirst=True
1326 )
1327 ), expectedDeduplicatedBiases
1328 )
1329 # Materialize the bias dataset queries, too, so now we're
1330 # materializing every single step.
1331 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1332 findFirst=False).materialize() as biases:
1333 self.assertCountEqual(list(biases), expectedAllBiases)
1334 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1335 findFirst=True).materialize() as biases:
1336 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1338 def testEmptyDimensionsQueries(self):
1339 """Test Query and QueryResults objects in the case where there are no
1340 dimensions.
1341 """
1342 # Set up test data: one dataset type, two runs, one dataset in each.
1343 registry = self.makeRegistry()
1344 self.loadData(registry, "base.yaml")
1345 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1346 registry.registerDatasetType(schema)
1347 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1348 run1 = "run1"
1349 run2 = "run2"
1350 registry.registerRun(run1)
1351 registry.registerRun(run2)
1352 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1353 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1354 # Query directly for both of the datasets, and each one, one at a time.
1355 self.assertCountEqual(
1356 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1357 [dataset1, dataset2]
1358 )
1359 self.assertEqual(
1360 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1361 [dataset1],
1362 )
1363 self.assertEqual(
1364 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1365 [dataset2],
1366 )
1367 # Query for data IDs with no dimensions.
1368 dataIds = registry.queryDataIds([])
1369 self.assertEqual(
1370 dataIds.toSequence(),
1371 DataCoordinateSequence([dataId], registry.dimensions.empty)
1372 )
1373 # Use queried data IDs to find the datasets.
1374 self.assertCountEqual(
1375 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1376 [dataset1, dataset2],
1377 )
1378 self.assertEqual(
1379 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1380 [dataset1],
1381 )
1382 self.assertEqual(
1383 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1384 [dataset2],
1385 )
1386 # Now materialize the data ID query results and repeat those tests.
1387 with dataIds.materialize() as dataIds:
1388 self.assertEqual(
1389 dataIds.toSequence(),
1390 DataCoordinateSequence([dataId], registry.dimensions.empty)
1391 )
1392 self.assertCountEqual(
1393 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1394 [dataset1, dataset2],
1395 )
1396 self.assertEqual(
1397 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1398 [dataset1],
1399 )
1400 self.assertEqual(
1401 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1402 [dataset2],
1403 )
1404 # Query for non-empty data IDs, then subset that to get the empty one.
1405 # Repeat the above tests starting from that.
1406 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1407 self.assertEqual(
1408 dataIds.toSequence(),
1409 DataCoordinateSequence([dataId], registry.dimensions.empty)
1410 )
1411 self.assertCountEqual(
1412 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1413 [dataset1, dataset2],
1414 )
1415 self.assertEqual(
1416 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1417 [dataset1],
1418 )
1419 self.assertEqual(
1420 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1421 [dataset2],
1422 )
1423 with dataIds.materialize() as dataIds:
1424 self.assertEqual(
1425 dataIds.toSequence(),
1426 DataCoordinateSequence([dataId], registry.dimensions.empty)
1427 )
1428 self.assertCountEqual(
1429 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1430 [dataset1, dataset2],
1431 )
1432 self.assertEqual(
1433 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1434 [dataset1],
1435 )
1436 self.assertEqual(
1437 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1438 [dataset2],
1439 )
1440 # Query for non-empty data IDs, then materialize, then subset to get
1441 # the empty one. Repeat again.
1442 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1443 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1444 self.assertEqual(
1445 dataIds.toSequence(),
1446 DataCoordinateSequence([dataId], registry.dimensions.empty)
1447 )
1448 self.assertCountEqual(
1449 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1450 [dataset1, dataset2],
1451 )
1452 self.assertEqual(
1453 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1454 [dataset1],
1455 )
1456 self.assertEqual(
1457 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1458 [dataset2],
1459 )
1460 with dataIds.materialize() as dataIds:
1461 self.assertEqual(
1462 dataIds.toSequence(),
1463 DataCoordinateSequence([dataId], registry.dimensions.empty)
1464 )
1465 self.assertCountEqual(
1466 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1467 [dataset1, dataset2],
1468 )
1469 self.assertEqual(
1470 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1471 [dataset1],
1472 )
1473 self.assertEqual(
1474 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1475 [dataset2],
1476 )
1478 def testCalibrationCollections(self):
1479 """Test operations on `~CollectionType.CALIBRATION` collections,
1480 including `Registry.certify`, `Registry.decertify`, and
1481 `Registry.findDataset`.
1482 """
1483 # Setup - make a Registry, fill it with some datasets in
1484 # non-calibration collections.
1485 registry = self.makeRegistry()
1486 self.loadData(registry, "base.yaml")
1487 self.loadData(registry, "datasets.yaml")
1488 # Set up some timestamps.
1489 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1490 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1491 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1492 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1493 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1494 allTimespans = [
1495 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1496 ]
1497 # Get references to some datasets.
1498 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1499 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1500 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1501 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1502 # Register the main calibration collection we'll be working with.
1503 collection = "Cam1/calibs/default"
1504 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1505 # Cannot associate into a calibration collection (no timespan).
1506 with self.assertRaises(TypeError):
1507 registry.associate(collection, [bias2a])
1508 # Certify 2a dataset with [t2, t4) validity.
1509 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1510 # We should not be able to certify 2b with anything overlapping that
1511 # window.
1512 with self.assertRaises(ConflictingDefinitionError):
1513 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1514 with self.assertRaises(ConflictingDefinitionError):
1515 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1516 with self.assertRaises(ConflictingDefinitionError):
1517 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1518 with self.assertRaises(ConflictingDefinitionError):
1519 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1520 with self.assertRaises(ConflictingDefinitionError):
1521 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1522 with self.assertRaises(ConflictingDefinitionError):
1523 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1524 with self.assertRaises(ConflictingDefinitionError):
1525 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1526 with self.assertRaises(ConflictingDefinitionError):
1527 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1528 # We should be able to certify 3a with a range overlapping that window,
1529 # because it's for a different detector.
1530 # We'll certify 3a over [t1, t3).
1531 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1532 # Now we'll certify 2b and 3b together over [t4, ∞).
1533 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1535 # Fetch all associations and check that they are what we expect.
1536 self.assertCountEqual(
1537 list(
1538 registry.queryDatasetAssociations(
1539 "bias",
1540 collections=[collection, "imported_g", "imported_r"],
1541 )
1542 ),
1543 [
1544 DatasetAssociation(
1545 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1546 collection="imported_g",
1547 timespan=None,
1548 ),
1549 DatasetAssociation(
1550 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1551 collection="imported_r",
1552 timespan=None,
1553 ),
1554 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1555 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1556 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1557 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1558 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1559 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1560 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1561 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1562 ]
1563 )
1565 class Ambiguous:
1566 """Tag class to denote lookups that are expected to be ambiguous.
1567 """
1568 pass
1570 def assertLookup(detector: int, timespan: Timespan,
1571 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1572 """Local function that asserts that a bias lookup returns the given
1573 expected result.
1574 """
1575 if expected is Ambiguous:
1576 with self.assertRaises(RuntimeError):
1577 registry.findDataset("bias", collections=collection, instrument="Cam1",
1578 detector=detector, timespan=timespan)
1579 else:
1580 self.assertEqual(
1581 expected,
1582 registry.findDataset("bias", collections=collection, instrument="Cam1",
1583 detector=detector, timespan=timespan)
1584 )
1586 # Systematically test lookups against expected results.
1587 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1588 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1589 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1590 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1591 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1592 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1593 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1594 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1595 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1596 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1597 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1598 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1599 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1600 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1601 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1602 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1603 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1604 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1605 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1606 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1607 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1608 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1609 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1610 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1611 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1612 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1613 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1614 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1615 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1616 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1617 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1618 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1619 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1620 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1621 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1622 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1623 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1624 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1625 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1626 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1627 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1628 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1630 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1631 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1632 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1633 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1634 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1635 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1636 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1637 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1638 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1639 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1640 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1641 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1642 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1643 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1644 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1645 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1646 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1647 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1648 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1649 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1650 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1651 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1652 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1653 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1654 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1655 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1656 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1657 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1658 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1659 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1660 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1661 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1662 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1663 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1664 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1665 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1666 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1667 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1668 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1669 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1670 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1671 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1672 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1673 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1674 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1675 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1677 # Decertify everything, this time with explicit data IDs, then check
1678 # that no lookups succeed.
1679 registry.decertify(
1680 collection, "bias", Timespan(None, None),
1681 dataIds=[
1682 dict(instrument="Cam1", detector=2),
1683 dict(instrument="Cam1", detector=3),
1684 ]
1685 )
1686 for detector in (2, 3):
1687 for timespan in allTimespans:
1688 assertLookup(detector=detector, timespan=timespan, expected=None)
1689 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1690 # those.
1691 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1692 for timespan in allTimespans:
1693 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1694 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1695 # Decertify just bias2 over [t2, t4).
1696 # This should split a single certification row into two (and leave the
1697 # other existing row, for bias3a, alone).
1698 registry.decertify(collection, "bias", Timespan(t2, t4),
1699 dataIds=[dict(instrument="Cam1", detector=2)])
1700 for timespan in allTimespans:
1701 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1702 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1703 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1704 if overlapsBefore and overlapsAfter:
1705 expected = Ambiguous
1706 elif overlapsBefore or overlapsAfter:
1707 expected = bias2a
1708 else:
1709 expected = None
1710 assertLookup(detector=2, timespan=timespan, expected=expected)
1712 def testIngestTimeQuery(self):
1714 registry = self.makeRegistry()
1715 self.loadData(registry, "base.yaml")
1716 self.loadData(registry, "datasets.yaml")
1718 datasets = list(registry.queryDatasets(..., collections=...))
1719 len0 = len(datasets)
1720 self.assertGreater(len0, 0)
1722 where = "ingest_date > T'2000-01-01'"
1723 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1724 len1 = len(datasets)
1725 self.assertEqual(len0, len1)
1727 # no one will ever use this piece of software in 30 years
1728 where = "ingest_date > T'2050-01-01'"
1729 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1730 len2 = len(datasets)
1731 self.assertEqual(len2, 0)
1733 def testTimespanQueries(self):
1734 """Test query expressions involving timespans.
1735 """
1736 registry = self.makeRegistry()
1737 self.loadData(registry, "hsc-rc2-subset.yaml")
1738 # All exposures in the database; mapping from ID to timespan.
1739 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1740 # Just those IDs, sorted (which is also temporal sorting, because HSC
1741 # exposure IDs are monotonically increasing).
1742 ids = sorted(visits.keys())
1743 self.assertGreater(len(ids), 20)
1744 # Pick some quasi-random indexes into `ids` to play with.
1745 i1 = int(len(ids)*0.1)
1746 i2 = int(len(ids)*0.3)
1747 i3 = int(len(ids)*0.6)
1748 i4 = int(len(ids)*0.8)
1749 # Extract some times from those: just before the beginning of i1 (which
1750 # should be after the end of the exposure before), exactly the
1751 # beginning of i2, just after the beginning of i3 (and before its end),
1752 # and the exact end of i4.
1753 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1754 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1755 t2 = visits[ids[i2]].begin
1756 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1757 self.assertLess(t3, visits[ids[i3]].end)
1758 t4 = visits[ids[i4]].end
1759 # Make sure those are actually in order.
1760 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1762 bind = {
1763 "t1": t1,
1764 "t2": t2,
1765 "t3": t3,
1766 "t4": t4,
1767 "ts23": Timespan(t2, t3),
1768 }
1770 def query(where):
1771 """Helper function that queries for visit data IDs and returns
1772 results as a sorted, deduplicated list of visit IDs.
1773 """
1774 return sorted(
1775 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1776 instrument="HSC",
1777 bind=bind,
1778 where=where)}
1779 )
1781 # Try a bunch of timespan queries, mixing up the bounds themselves,
1782 # where they appear in the expression, and how we get the timespan into
1783 # the expression.
1785 # t1 is before the start of i1, so this should not include i1.
1786 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1787 # t2 is exactly at the start of i2, but ends are exclusive, so these
1788 # should not include i2.
1789 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1790 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1791 # t3 is in the middle of i3, so this should include i3.
1792 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1793 # This one should not include t3 by the same reasoning.
1794 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1795 # t4 is exactly at the end of i4, so this should include i4.
1796 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1797 # i4's upper bound of t4 is exclusive so this should not include t4.
1798 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1800 # Now some timespan vs. time scalar queries.
1801 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1802 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1803 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1804 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1805 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1806 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1808 # Empty timespans should not overlap anything.
1809 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
1811 def testCollectionSummaries(self):
1812 """Test recording and retrieval of collection summaries.
1813 """
1814 self.maxDiff = None
1815 registry = self.makeRegistry()
1816 # Importing datasets from yaml should go through the code path where
1817 # we update collection summaries as we insert datasets.
1818 self.loadData(registry, "base.yaml")
1819 self.loadData(registry, "datasets.yaml")
1820 flat = registry.getDatasetType("flat")
1821 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
1822 expected1.datasetTypes.add(registry.getDatasetType("bias"))
1823 expected1.datasetTypes.add(flat)
1824 expected1.dimensions.update_extract(
1825 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
1826 )
1827 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1828 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1829 # Create a chained collection with both of the imported runs; the
1830 # summary should be the same, because it's a union with itself.
1831 chain = "chain"
1832 registry.registerCollection(chain, CollectionType.CHAINED)
1833 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
1834 self.assertEqual(registry.getCollectionSummary(chain), expected1)
1835 # Associate flats only into a tagged collection and a calibration
1836 # collection to check summaries of those.
1837 tag = "tag"
1838 registry.registerCollection(tag, CollectionType.TAGGED)
1839 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
1840 calibs = "calibs"
1841 registry.registerCollection(calibs, CollectionType.CALIBRATION)
1842 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
1843 timespan=Timespan(None, None))
1844 expected2 = expected1.copy()
1845 expected2.datasetTypes.discard("bias")
1846 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1847 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1848 # Explicitly calling Registry.refresh() should load those same
1849 # summaries, via a totally different code path.
1850 registry.refresh()
1851 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1852 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1853 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1854 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1856 def testUnrelatedDimensionQueries(self):
1857 """Test that WHERE expressions in queries can reference dimensions that
1858 are not in the result set.
1859 """
1860 registry = self.makeRegistry()
1861 # There is no data to back this query, but it should still return
1862 # zero records instead of raising.
1863 self.assertFalse(
1864 set(registry.queryDataIds(["visit", "detector"],
1865 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
1866 )