Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from .._registry import (
56 CollectionSummary,
57 CollectionType,
58 ConflictingDefinitionError,
59 InconsistentDataIdError,
60 OrphanedRecordError,
61 Registry,
62 RegistryConfig,
63)
64from .._exceptions import MissingCollectionError
65from ..interfaces import ButlerAttributeExistsError
68class RegistryTests(ABC):
69 """Generic tests for the `Registry` class that can be subclassed to
70 generate tests for different configurations.
71 """
73 collectionsManager: Optional[str] = None
74 """Name of the collections manager class, if subclass provides value for
75 this member then it overrides name specified in default configuration
76 (`str`).
77 """
79 @classmethod
80 @abstractmethod
81 def getDataDir(cls) -> str:
82 """Return the root directory containing test data YAML files.
83 """
84 raise NotImplementedError()
86 def makeRegistryConfig(self) -> RegistryConfig:
87 """Create RegistryConfig used to create a registry.
89 This method should be called by a subclass from `makeRegistry`.
90 Returned instance will be pre-configured based on the values of class
91 members, and default-configured for all other parametrs. Subclasses
92 that need default configuration should just instantiate
93 `RegistryConfig` directly.
94 """
95 config = RegistryConfig()
96 if self.collectionsManager:
97 config["managers"]["collections"] = self.collectionsManager
98 return config
100 @abstractmethod
101 def makeRegistry(self) -> Registry:
102 """Return the Registry instance to be tested.
103 """
104 raise NotImplementedError()
106 def loadData(self, registry: Registry, filename: str):
107 """Load registry test data from ``getDataDir/<filename>``,
108 which should be a YAML import/export file.
109 """
110 from ...transfers import YamlRepoImportBackend
111 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
112 backend = YamlRepoImportBackend(stream, registry)
113 backend.register()
114 backend.load(datastore=None)
116 def testOpaque(self):
117 """Tests for `Registry.registerOpaqueTable`,
118 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
119 `Registry.deleteOpaqueData`.
120 """
121 registry = self.makeRegistry()
122 table = "opaque_table_for_testing"
123 registry.registerOpaqueTable(
124 table,
125 spec=ddl.TableSpec(
126 fields=[
127 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
128 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
129 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
130 ],
131 )
132 )
133 rows = [
134 {"id": 1, "name": "one", "count": None},
135 {"id": 2, "name": "two", "count": 5},
136 {"id": 3, "name": "three", "count": 6},
137 ]
138 registry.insertOpaqueData(table, *rows)
139 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
140 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
141 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
142 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
143 registry.deleteOpaqueData(table, id=3)
144 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
145 registry.deleteOpaqueData(table)
146 self.assertEqual([], list(registry.fetchOpaqueData(table)))
148 def testDatasetType(self):
149 """Tests for `Registry.registerDatasetType` and
150 `Registry.getDatasetType`.
151 """
152 registry = self.makeRegistry()
153 # Check valid insert
154 datasetTypeName = "test"
155 storageClass = StorageClass("testDatasetType")
156 registry.storageClasses.registerStorageClass(storageClass)
157 dimensions = registry.dimensions.extract(("instrument", "visit"))
158 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
159 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
160 # Inserting for the first time should return True
161 self.assertTrue(registry.registerDatasetType(inDatasetType))
162 outDatasetType1 = registry.getDatasetType(datasetTypeName)
163 self.assertEqual(outDatasetType1, inDatasetType)
165 # Re-inserting should work
166 self.assertFalse(registry.registerDatasetType(inDatasetType))
167 # Except when they are not identical
168 with self.assertRaises(ConflictingDefinitionError):
169 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
170 registry.registerDatasetType(nonIdenticalDatasetType)
172 # Template can be None
173 datasetTypeName = "testNoneTemplate"
174 storageClass = StorageClass("testDatasetType2")
175 registry.storageClasses.registerStorageClass(storageClass)
176 dimensions = registry.dimensions.extract(("instrument", "visit"))
177 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
178 registry.registerDatasetType(inDatasetType)
179 outDatasetType2 = registry.getDatasetType(datasetTypeName)
180 self.assertEqual(outDatasetType2, inDatasetType)
182 allTypes = set(registry.queryDatasetTypes())
183 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
185 def testDimensions(self):
186 """Tests for `Registry.insertDimensionData`,
187 `Registry.syncDimensionData`, and `Registry.expandDataId`.
188 """
189 registry = self.makeRegistry()
190 dimensionName = "instrument"
191 dimension = registry.dimensions[dimensionName]
192 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
193 "class_name": "lsst.obs.base.Instrument"}
194 registry.insertDimensionData(dimensionName, dimensionValue)
195 # Inserting the same value twice should fail
196 with self.assertRaises(sqlalchemy.exc.IntegrityError):
197 registry.insertDimensionData(dimensionName, dimensionValue)
198 # expandDataId should retrieve the record we just inserted
199 self.assertEqual(
200 registry.expandDataId(
201 instrument="DummyCam",
202 graph=dimension.graph
203 ).records[dimensionName].toDict(),
204 dimensionValue
205 )
206 # expandDataId should raise if there is no record with the given ID.
207 with self.assertRaises(LookupError):
208 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
209 # band doesn't have a table; insert should fail.
210 with self.assertRaises(TypeError):
211 registry.insertDimensionData("band", {"band": "i"})
212 dimensionName2 = "physical_filter"
213 dimension2 = registry.dimensions[dimensionName2]
214 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
215 # Missing required dependency ("instrument") should fail
216 with self.assertRaises(KeyError):
217 registry.insertDimensionData(dimensionName2, dimensionValue2)
218 # Adding required dependency should fix the failure
219 dimensionValue2["instrument"] = "DummyCam"
220 registry.insertDimensionData(dimensionName2, dimensionValue2)
221 # expandDataId should retrieve the record we just inserted.
222 self.assertEqual(
223 registry.expandDataId(
224 instrument="DummyCam", physical_filter="DummyCam_i",
225 graph=dimension2.graph
226 ).records[dimensionName2].toDict(),
227 dimensionValue2
228 )
229 # Use syncDimensionData to insert a new record successfully.
230 dimensionName3 = "detector"
231 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
232 "name_in_raft": "zero", "purpose": "SCIENCE"}
233 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
234 # Sync that again. Note that one field ("raft") is NULL, and that
235 # should be okay.
236 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
237 # Now try that sync with the same primary key but a different value.
238 # This should fail.
239 with self.assertRaises(ConflictingDefinitionError):
240 registry.syncDimensionData(
241 dimensionName3,
242 {"instrument": "DummyCam", "id": 1, "full_name": "one",
243 "name_in_raft": "four", "purpose": "SCIENCE"}
244 )
246 @unittest.skipIf(np is None, "numpy not available.")
247 def testNumpyDataId(self):
248 """Test that we can use a numpy int in a dataId."""
249 registry = self.makeRegistry()
250 dimensionEntries = [
251 ("instrument", {"instrument": "DummyCam"}),
252 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
253 # Using an np.int64 here fails unless Records.fromDict is also
254 # patched to look for numbers.Integral
255 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
256 ]
257 for args in dimensionEntries:
258 registry.insertDimensionData(*args)
260 # Try a normal integer and something that looks like an int but
261 # is not.
262 for visit_id in (42, np.int64(42)):
263 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
264 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
265 self.assertEqual(expanded["visit"], int(visit_id))
266 self.assertIsInstance(expanded["visit"], int)
268 def testDataIdRelationships(self):
269 """Test that `Registry.expandDataId` raises an exception when the given
270 keys are inconsistent.
271 """
272 registry = self.makeRegistry()
273 self.loadData(registry, "base.yaml")
274 # Insert a few more dimension records for the next test.
275 registry.insertDimensionData(
276 "exposure",
277 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
278 )
279 registry.insertDimensionData(
280 "exposure",
281 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
282 )
283 registry.insertDimensionData(
284 "visit_system",
285 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
286 )
287 registry.insertDimensionData(
288 "visit",
289 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
290 )
291 registry.insertDimensionData(
292 "visit_definition",
293 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
294 )
295 with self.assertRaises(InconsistentDataIdError):
296 registry.expandDataId(
297 {"instrument": "Cam1", "visit": 1, "exposure": 2},
298 )
300 def testDataset(self):
301 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
302 and `Registry.removeDatasets`.
303 """
304 registry = self.makeRegistry()
305 self.loadData(registry, "base.yaml")
306 run = "test"
307 registry.registerRun(run)
308 datasetType = registry.getDatasetType("bias")
309 dataId = {"instrument": "Cam1", "detector": 2}
310 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
311 outRef = registry.getDataset(ref.id)
312 self.assertIsNotNone(ref.id)
313 self.assertEqual(ref, outRef)
314 with self.assertRaises(ConflictingDefinitionError):
315 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
316 registry.removeDatasets([ref])
317 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
319 def testFindDataset(self):
320 """Tests for `Registry.findDataset`.
321 """
322 registry = self.makeRegistry()
323 self.loadData(registry, "base.yaml")
324 run = "test"
325 datasetType = registry.getDatasetType("bias")
326 dataId = {"instrument": "Cam1", "detector": 4}
327 registry.registerRun(run)
328 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
329 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
330 self.assertEqual(outputRef, inputRef)
331 # Check that retrieval with invalid dataId raises
332 with self.assertRaises(LookupError):
333 dataId = {"instrument": "Cam1"} # no detector
334 registry.findDataset(datasetType, dataId, collections=run)
335 # Check that different dataIds match to different datasets
336 dataId1 = {"instrument": "Cam1", "detector": 1}
337 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
338 dataId2 = {"instrument": "Cam1", "detector": 2}
339 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
340 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
341 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
342 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
343 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
344 # Check that requesting a non-existing dataId returns None
345 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
346 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
348 def testRemoveDatasetTypeSuccess(self):
349 """Test that Registry.removeDatasetType works when there are no
350 datasets of that type present.
351 """
352 registry = self.makeRegistry()
353 self.loadData(registry, "base.yaml")
354 registry.removeDatasetType("flat")
355 with self.assertRaises(KeyError):
356 registry.getDatasetType("flat")
358 def testRemoveDatasetTypeFailure(self):
359 """Test that Registry.removeDatasetType raises when there are datasets
360 of that type present or if the dataset type is for a component.
361 """
362 registry = self.makeRegistry()
363 self.loadData(registry, "base.yaml")
364 self.loadData(registry, "datasets.yaml")
365 with self.assertRaises(OrphanedRecordError):
366 registry.removeDatasetType("flat")
367 with self.assertRaises(ValueError):
368 registry.removeDatasetType(DatasetType.nameWithComponent("flat", "image"))
370 def testDatasetTypeComponentQueries(self):
371 """Test component options when querying for dataset types.
372 """
373 registry = self.makeRegistry()
374 self.loadData(registry, "base.yaml")
375 self.loadData(registry, "datasets.yaml")
376 # Test querying for dataset types with different inputs.
377 # First query for all dataset types; components should only be included
378 # when components=True.
379 self.assertEqual(
380 {"bias", "flat"},
381 NamedValueSet(registry.queryDatasetTypes()).names
382 )
383 self.assertEqual(
384 {"bias", "flat"},
385 NamedValueSet(registry.queryDatasetTypes(components=False)).names
386 )
387 self.assertLess(
388 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
389 NamedValueSet(registry.queryDatasetTypes(components=True)).names
390 )
391 # Use a pattern that can match either parent or components. Again,
392 # components are only returned if components=True.
393 self.assertEqual(
394 {"bias"},
395 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
396 )
397 self.assertEqual(
398 {"bias"},
399 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
400 )
401 self.assertLess(
402 {"bias", "bias.wcs"},
403 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
404 )
405 # This pattern matches only a component. In this case we also return
406 # that component dataset type if components=None.
407 self.assertEqual(
408 {"bias.wcs"},
409 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
410 )
411 self.assertEqual(
412 set(),
413 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
414 )
415 self.assertEqual(
416 {"bias.wcs"},
417 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
418 )
419 # Add a dataset type using a StorageClass that we'll then remove; check
420 # that this does not affect our ability to query for dataset types
421 # (though it will warn).
422 tempStorageClass = StorageClass(
423 name="TempStorageClass",
424 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
425 )
426 registry.storageClasses.registerStorageClass(tempStorageClass)
427 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
428 universe=registry.dimensions)
429 registry.registerDatasetType(datasetType)
430 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
431 datasetType._storageClass = None
432 del tempStorageClass
433 # Querying for all dataset types, including components, should include
434 # at least all non-component dataset types (and I don't want to
435 # enumerate all of the Exposure components for bias and flat here).
436 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
437 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
438 self.assertIn("TempStorageClass", cm.output[0])
439 self.assertLess({"bias", "flat", "temporary"}, everything.names)
440 # It should not include "temporary.columns", because we tried to remove
441 # the storage class that would tell it about that. So if the next line
442 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
443 # this part of the test isn't doing anything, because the _unregister
444 # call about isn't simulating the real-life case we want it to
445 # simulate, in which different versions of daf_butler in entirely
446 # different Python processes interact with the same repo.
447 self.assertNotIn("temporary.data", everything.names)
448 # Query for dataset types that start with "temp". This should again
449 # not include the component, and also not fail.
450 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
451 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
452 self.assertIn("TempStorageClass", cm.output[0])
453 self.assertEqual({"temporary"}, startsWithTemp.names)
455 def testComponentLookups(self):
456 """Test searching for component datasets via their parents.
457 """
458 registry = self.makeRegistry()
459 self.loadData(registry, "base.yaml")
460 self.loadData(registry, "datasets.yaml")
461 # Test getting the child dataset type (which does still exist in the
462 # Registry), and check for consistency with
463 # DatasetRef.makeComponentRef.
464 collection = "imported_g"
465 parentType = registry.getDatasetType("bias")
466 childType = registry.getDatasetType("bias.wcs")
467 parentRefResolved = registry.findDataset(parentType, collections=collection,
468 instrument="Cam1", detector=1)
469 self.assertIsInstance(parentRefResolved, DatasetRef)
470 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
471 # Search for a single dataset with findDataset.
472 childRef1 = registry.findDataset("bias.wcs", collections=collection,
473 dataId=parentRefResolved.dataId)
474 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
475 # Search for detector data IDs constrained by component dataset
476 # existence with queryDataIds.
477 dataIds = registry.queryDataIds(
478 ["detector"],
479 datasets=["bias.wcs"],
480 collections=collection,
481 ).toSet()
482 self.assertEqual(
483 dataIds,
484 DataCoordinateSet(
485 {
486 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
487 for d in (1, 2, 3)
488 },
489 parentType.dimensions,
490 )
491 )
492 # Search for multiple datasets of a single type with queryDatasets.
493 childRefs2 = set(registry.queryDatasets(
494 "bias.wcs",
495 collections=collection,
496 ))
497 self.assertEqual(
498 {ref.unresolved() for ref in childRefs2},
499 {DatasetRef(childType, dataId) for dataId in dataIds}
500 )
502 def testCollections(self):
503 """Tests for registry methods that manage collections.
504 """
505 registry = self.makeRegistry()
506 self.loadData(registry, "base.yaml")
507 self.loadData(registry, "datasets.yaml")
508 run1 = "imported_g"
509 run2 = "imported_r"
510 # Test setting a collection docstring after it has been created.
511 registry.setCollectionDocumentation(run1, "doc for run1")
512 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
513 registry.setCollectionDocumentation(run1, None)
514 self.assertIsNone(registry.getCollectionDocumentation(run1))
515 datasetType = "bias"
516 # Find some datasets via their run's collection.
517 dataId1 = {"instrument": "Cam1", "detector": 1}
518 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
519 self.assertIsNotNone(ref1)
520 dataId2 = {"instrument": "Cam1", "detector": 2}
521 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
522 self.assertIsNotNone(ref2)
523 # Associate those into a new collection,then look for them there.
524 tag1 = "tag1"
525 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
526 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
527 registry.associate(tag1, [ref1, ref2])
528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
530 # Disassociate one and verify that we can't it there anymore...
531 registry.disassociate(tag1, [ref1])
532 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
533 # ...but we can still find ref2 in tag1, and ref1 in the run.
534 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
535 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
536 collections = set(registry.queryCollections())
537 self.assertEqual(collections, {run1, run2, tag1})
538 # Associate both refs into tag1 again; ref2 is already there, but that
539 # should be a harmless no-op.
540 registry.associate(tag1, [ref1, ref2])
541 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
542 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
543 # Get a different dataset (from a different run) that has the same
544 # dataset type and data ID as ref2.
545 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
546 self.assertNotEqual(ref2, ref2b)
547 # Attempting to associate that into tag1 should be an error.
548 with self.assertRaises(ConflictingDefinitionError):
549 registry.associate(tag1, [ref2b])
550 # That error shouldn't have messed up what we had before.
551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
552 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
553 # Attempt to associate the conflicting dataset again, this time with
554 # a dataset that isn't in the collection and won't cause a conflict.
555 # Should also fail without modifying anything.
556 dataId3 = {"instrument": "Cam1", "detector": 3}
557 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
558 with self.assertRaises(ConflictingDefinitionError):
559 registry.associate(tag1, [ref3, ref2b])
560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
562 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
563 # Register a chained collection that searches [tag1, run2]
564 chain1 = "chain1"
565 registry.registerCollection(chain1, type=CollectionType.CHAINED)
566 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
567 # Chained collection exists, but has no collections in it.
568 self.assertFalse(registry.getCollectionChain(chain1))
569 # If we query for all collections, we should get the chained collection
570 # only if we don't ask to flatten it (i.e. yield only its children).
571 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
572 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
573 # Attempt to set its child collections to something circular; that
574 # should fail.
575 with self.assertRaises(ValueError):
576 registry.setCollectionChain(chain1, [tag1, chain1])
577 # Add the child collections.
578 registry.setCollectionChain(chain1, [tag1, run2])
579 self.assertEqual(
580 list(registry.getCollectionChain(chain1)),
581 [tag1, run2]
582 )
583 # Searching for dataId1 or dataId2 in the chain should return ref1 and
584 # ref2, because both are in tag1.
585 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
586 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
587 # Now disassociate ref2 from tag1. The search (for bias) with
588 # dataId2 in chain1 should then:
589 # 1. not find it in tag1
590 # 2. find a different dataset in run2
591 registry.disassociate(tag1, [ref2])
592 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
593 self.assertNotEqual(ref2b, ref2)
594 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
595 # Define a new chain so we can test recursive chains.
596 chain2 = "chain2"
597 registry.registerCollection(chain2, type=CollectionType.CHAINED)
598 registry.setCollectionChain(chain2, [run2, chain1])
599 # Query for collections matching a regex.
600 self.assertCountEqual(
601 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
602 ["imported_r", "imported_g"]
603 )
604 # Query for collections matching a regex or an explicit str.
605 self.assertCountEqual(
606 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
607 ["imported_r", "imported_g", "chain1"]
608 )
609 # Search for bias with dataId1 should find it via tag1 in chain2,
610 # recursing, because is not in run1.
611 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
612 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
613 # Search for bias with dataId2 should find it in run2 (ref2b).
614 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
615 # Search for a flat that is in run2. That should not be found
616 # at the front of chain2, because of the restriction to bias
617 # on run2 there, but it should be found in at the end of chain1.
618 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
619 ref4 = registry.findDataset("flat", dataId4, collections=run2)
620 self.assertIsNotNone(ref4)
621 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
622 # Deleting a collection that's part of a CHAINED collection is not
623 # allowed, and is exception-safe.
624 with self.assertRaises(Exception):
625 registry.removeCollection(run2)
626 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
627 with self.assertRaises(Exception):
628 registry.removeCollection(chain1)
629 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
630 # Actually remove chain2, test that it's gone by asking for its type.
631 registry.removeCollection(chain2)
632 with self.assertRaises(MissingCollectionError):
633 registry.getCollectionType(chain2)
634 # Actually remove run2 and chain1, which should work now.
635 registry.removeCollection(chain1)
636 registry.removeCollection(run2)
637 with self.assertRaises(MissingCollectionError):
638 registry.getCollectionType(run2)
639 with self.assertRaises(MissingCollectionError):
640 registry.getCollectionType(chain1)
641 # Remove tag1 as well, just to test that we can remove TAGGED
642 # collections.
643 registry.removeCollection(tag1)
644 with self.assertRaises(MissingCollectionError):
645 registry.getCollectionType(tag1)
647 def testCollectionChainFlatten(self):
648 """Test that Registry.setCollectionChain obeys its 'flatten' option.
649 """
650 registry = self.makeRegistry()
651 registry.registerCollection("inner", CollectionType.CHAINED)
652 registry.registerCollection("innermost", CollectionType.RUN)
653 registry.setCollectionChain("inner", ["innermost"])
654 registry.registerCollection("outer", CollectionType.CHAINED)
655 registry.setCollectionChain("outer", ["inner"], flatten=False)
656 self.assertEqual(list(registry.getCollectionChain("outer")), ["inner"])
657 registry.setCollectionChain("outer", ["inner"], flatten=True)
658 self.assertEqual(list(registry.getCollectionChain("outer")), ["innermost"])
660 def testBasicTransaction(self):
661 """Test that all operations within a single transaction block are
662 rolled back if an exception propagates out of the block.
663 """
664 registry = self.makeRegistry()
665 storageClass = StorageClass("testDatasetType")
666 registry.storageClasses.registerStorageClass(storageClass)
667 with registry.transaction():
668 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
669 with self.assertRaises(ValueError):
670 with registry.transaction():
671 registry.insertDimensionData("instrument", {"name": "Cam2"})
672 raise ValueError("Oops, something went wrong")
673 # Cam1 should exist
674 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
675 # But Cam2 and Cam3 should both not exist
676 with self.assertRaises(LookupError):
677 registry.expandDataId(instrument="Cam2")
678 with self.assertRaises(LookupError):
679 registry.expandDataId(instrument="Cam3")
681 def testNestedTransaction(self):
682 """Test that operations within a transaction block are not rolled back
683 if an exception propagates out of an inner transaction block and is
684 then caught.
685 """
686 registry = self.makeRegistry()
687 dimension = registry.dimensions["instrument"]
688 dataId1 = {"instrument": "DummyCam"}
689 dataId2 = {"instrument": "DummyCam2"}
690 checkpointReached = False
691 with registry.transaction():
692 # This should be added and (ultimately) committed.
693 registry.insertDimensionData(dimension, dataId1)
694 with self.assertRaises(sqlalchemy.exc.IntegrityError):
695 with registry.transaction(savepoint=True):
696 # This does not conflict, and should succeed (but not
697 # be committed).
698 registry.insertDimensionData(dimension, dataId2)
699 checkpointReached = True
700 # This should conflict and raise, triggerring a rollback
701 # of the previous insertion within the same transaction
702 # context, but not the original insertion in the outer
703 # block.
704 registry.insertDimensionData(dimension, dataId1)
705 self.assertTrue(checkpointReached)
706 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
707 with self.assertRaises(LookupError):
708 registry.expandDataId(dataId2, graph=dimension.graph)
710 def testInstrumentDimensions(self):
711 """Test queries involving only instrument dimensions, with no joins to
712 skymap."""
713 registry = self.makeRegistry()
715 # need a bunch of dimensions and datasets for test
716 registry.insertDimensionData(
717 "instrument",
718 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
719 )
720 registry.insertDimensionData(
721 "physical_filter",
722 dict(instrument="DummyCam", name="dummy_r", band="r"),
723 dict(instrument="DummyCam", name="dummy_i", band="i"),
724 )
725 registry.insertDimensionData(
726 "detector",
727 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
728 )
729 registry.insertDimensionData(
730 "visit_system",
731 dict(instrument="DummyCam", id=1, name="default"),
732 )
733 registry.insertDimensionData(
734 "visit",
735 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
736 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
737 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
738 )
739 registry.insertDimensionData(
740 "exposure",
741 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
742 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
743 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
744 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
745 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
746 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
747 )
748 registry.insertDimensionData(
749 "visit_definition",
750 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
751 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
752 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
753 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
754 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
755 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
756 )
757 # dataset types
758 run1 = "test1_r"
759 run2 = "test2_r"
760 tagged2 = "test2_t"
761 registry.registerRun(run1)
762 registry.registerRun(run2)
763 registry.registerCollection(tagged2)
764 storageClass = StorageClass("testDataset")
765 registry.storageClasses.registerStorageClass(storageClass)
766 rawType = DatasetType(name="RAW",
767 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
768 storageClass=storageClass)
769 registry.registerDatasetType(rawType)
770 calexpType = DatasetType(name="CALEXP",
771 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
772 storageClass=storageClass)
773 registry.registerDatasetType(calexpType)
775 # add pre-existing datasets
776 for exposure in (100, 101, 110, 111):
777 for detector in (1, 2, 3):
778 # note that only 3 of 5 detectors have datasets
779 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
780 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
781 # exposures 100 and 101 appear in both run1 and tagged2.
782 # 100 has different datasets in the different collections
783 # 101 has the same dataset in both collections.
784 if exposure == 100:
785 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
786 if exposure in (100, 101):
787 registry.associate(tagged2, [ref])
788 # Add pre-existing datasets to tagged2.
789 for exposure in (200, 201):
790 for detector in (3, 4, 5):
791 # note that only 3 of 5 detectors have datasets
792 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
793 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
794 registry.associate(tagged2, [ref])
796 dimensions = DimensionGraph(
797 registry.dimensions,
798 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
799 )
800 # Test that single dim string works as well as list of str
801 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
802 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
803 self.assertEqual(rows, rowsI)
804 # with empty expression
805 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
806 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
807 for dataId in rows:
808 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
809 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
810 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
811 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
812 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
813 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
814 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
815 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
816 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
817 (100, 101, 110, 111))
818 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
819 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
821 # second collection
822 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
823 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
824 for dataId in rows:
825 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
826 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
827 (100, 101, 200, 201))
828 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
829 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
831 # with two input datasets
832 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
833 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
834 for dataId in rows:
835 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
836 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
837 (100, 101, 110, 111, 200, 201))
838 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
839 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
841 # limit to single visit
842 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
843 where="visit = 10", instrument="DummyCam").toSet()
844 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
845 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
846 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
847 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
849 # more limiting expression, using link names instead of Table.column
850 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
851 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
852 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
853 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
854 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
855 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
857 # expression excludes everything
858 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
859 where="visit > 1000", instrument="DummyCam").toSet()
860 self.assertEqual(len(rows), 0)
862 # Selecting by physical_filter, this is not in the dimensions, but it
863 # is a part of the full expression so it should work too.
864 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
865 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
866 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
867 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
868 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
869 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
871 def testSkyMapDimensions(self):
872 """Tests involving only skymap dimensions, no joins to instrument."""
873 registry = self.makeRegistry()
875 # need a bunch of dimensions and datasets for test, we want
876 # "band" in the test so also have to add physical_filter
877 # dimensions
878 registry.insertDimensionData(
879 "instrument",
880 dict(instrument="DummyCam")
881 )
882 registry.insertDimensionData(
883 "physical_filter",
884 dict(instrument="DummyCam", name="dummy_r", band="r"),
885 dict(instrument="DummyCam", name="dummy_i", band="i"),
886 )
887 registry.insertDimensionData(
888 "skymap",
889 dict(name="DummyMap", hash="sha!".encode("utf8"))
890 )
891 for tract in range(10):
892 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
893 registry.insertDimensionData(
894 "patch",
895 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
896 for patch in range(10)]
897 )
899 # dataset types
900 run = "test"
901 registry.registerRun(run)
902 storageClass = StorageClass("testDataset")
903 registry.storageClasses.registerStorageClass(storageClass)
904 calexpType = DatasetType(name="deepCoadd_calexp",
905 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
906 "band")),
907 storageClass=storageClass)
908 registry.registerDatasetType(calexpType)
909 mergeType = DatasetType(name="deepCoadd_mergeDet",
910 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
911 storageClass=storageClass)
912 registry.registerDatasetType(mergeType)
913 measType = DatasetType(name="deepCoadd_meas",
914 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
915 "band")),
916 storageClass=storageClass)
917 registry.registerDatasetType(measType)
919 dimensions = DimensionGraph(
920 registry.dimensions,
921 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
922 | measType.dimensions.required)
923 )
925 # add pre-existing datasets
926 for tract in (1, 3, 5):
927 for patch in (2, 4, 6, 7):
928 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
929 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
930 for aFilter in ("i", "r"):
931 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
932 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
934 # with empty expression
935 rows = registry.queryDataIds(dimensions,
936 datasets=[calexpType, mergeType], collections=run).toSet()
937 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
938 for dataId in rows:
939 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
940 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
941 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
942 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
944 # limit to 2 tracts and 2 patches
945 rows = registry.queryDataIds(dimensions,
946 datasets=[calexpType, mergeType], collections=run,
947 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
948 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
949 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
950 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
951 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
953 # limit to single filter
954 rows = registry.queryDataIds(dimensions,
955 datasets=[calexpType, mergeType], collections=run,
956 where="band = 'i'").toSet()
957 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
958 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
959 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
960 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
962 # expression excludes everything, specifying non-existing skymap is
963 # not a fatal error, it's operator error
964 rows = registry.queryDataIds(dimensions,
965 datasets=[calexpType, mergeType], collections=run,
966 where="skymap = 'Mars'").toSet()
967 self.assertEqual(len(rows), 0)
969 def testSpatialJoin(self):
970 """Test queries that involve spatial overlap joins.
971 """
972 registry = self.makeRegistry()
973 self.loadData(registry, "hsc-rc2-subset.yaml")
975 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
976 # the TopologicalFamily they belong to. We'll relate all elements in
977 # each family to all of the elements in each other family.
978 families = defaultdict(set)
979 # Dictionary of {element.name: {dataId: region}}.
980 regions = {}
981 for element in registry.dimensions.getDatabaseElements():
982 if element.spatial is not None:
983 families[element.spatial.name].add(element)
984 regions[element.name] = {
985 record.dataId: record.region for record in registry.queryDimensionRecords(element)
986 }
988 # If this check fails, it's not necessarily a problem - it may just be
989 # a reasonable change to the default dimension definitions - but the
990 # test below depends on there being more than one family to do anything
991 # useful.
992 self.assertEqual(len(families), 2)
994 # Overlap DatabaseDimensionElements with each other.
995 for family1, family2 in itertools.combinations(families, 2):
996 for element1, element2 in itertools.product(families[family1], families[family2]):
997 graph = DimensionGraph.union(element1.graph, element2.graph)
998 # Construct expected set of overlapping data IDs via a
999 # brute-force comparison of the regions we've already fetched.
1000 expected = {
1001 DataCoordinate.standardize(
1002 {**dataId1.byName(), **dataId2.byName()},
1003 graph=graph
1004 )
1005 for (dataId1, region1), (dataId2, region2)
1006 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
1007 if not region1.isDisjointFrom(region2)
1008 }
1009 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1010 queried = set(registry.queryDataIds(graph))
1011 self.assertEqual(expected, queried)
1013 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
1014 commonSkyPix = registry.dimensions.commonSkyPix
1015 for elementName, regions in regions.items():
1016 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
1017 expected = set()
1018 for dataId, region in regions.items():
1019 for begin, end in commonSkyPix.pixelization.envelope(region):
1020 expected.update(
1021 DataCoordinate.standardize(
1022 {commonSkyPix.name: index, **dataId.byName()},
1023 graph=graph
1024 )
1025 for index in range(begin, end)
1026 )
1027 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
1028 queried = set(registry.queryDataIds(graph))
1029 self.assertEqual(expected, queried)
1031 def testAbstractQuery(self):
1032 """Test that we can run a query that just lists the known
1033 bands. This is tricky because band is
1034 backed by a query against physical_filter.
1035 """
1036 registry = self.makeRegistry()
1037 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1038 registry.insertDimensionData(
1039 "physical_filter",
1040 dict(instrument="DummyCam", name="dummy_i", band="i"),
1041 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1042 dict(instrument="DummyCam", name="dummy_r", band="r"),
1043 )
1044 rows = registry.queryDataIds(["band"]).toSet()
1045 self.assertCountEqual(
1046 rows,
1047 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1048 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1049 )
1051 def testAttributeManager(self):
1052 """Test basic functionality of attribute manager.
1053 """
1054 # number of attributes with schema versions in a fresh database,
1055 # 6 managers with 3 records per manager, plus config for dimensions
1056 VERSION_COUNT = 6 * 3 + 1
1058 registry = self.makeRegistry()
1059 attributes = registry._managers.attributes
1061 # check what get() returns for non-existing key
1062 self.assertIsNone(attributes.get("attr"))
1063 self.assertEqual(attributes.get("attr", ""), "")
1064 self.assertEqual(attributes.get("attr", "Value"), "Value")
1065 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1067 # cannot store empty key or value
1068 with self.assertRaises(ValueError):
1069 attributes.set("", "value")
1070 with self.assertRaises(ValueError):
1071 attributes.set("attr", "")
1073 # set value of non-existing key
1074 attributes.set("attr", "value")
1075 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1076 self.assertEqual(attributes.get("attr"), "value")
1078 # update value of existing key
1079 with self.assertRaises(ButlerAttributeExistsError):
1080 attributes.set("attr", "value2")
1082 attributes.set("attr", "value2", force=True)
1083 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1084 self.assertEqual(attributes.get("attr"), "value2")
1086 # delete existing key
1087 self.assertTrue(attributes.delete("attr"))
1088 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1090 # delete non-existing key
1091 self.assertFalse(attributes.delete("non-attr"))
1093 # store bunch of keys and get the list back
1094 data = [
1095 ("version.core", "1.2.3"),
1096 ("version.dimensions", "3.2.1"),
1097 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1098 ]
1099 for key, value in data:
1100 attributes.set(key, value)
1101 items = dict(attributes.items())
1102 for key, value in data:
1103 self.assertEqual(items[key], value)
1105 def testQueryDatasetsDeduplication(self):
1106 """Test that the findFirst option to queryDatasets selects datasets
1107 from collections in the order given".
1108 """
1109 registry = self.makeRegistry()
1110 self.loadData(registry, "base.yaml")
1111 self.loadData(registry, "datasets.yaml")
1112 self.assertCountEqual(
1113 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1114 [
1115 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1116 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1117 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1118 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1119 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1120 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1121 ]
1122 )
1123 self.assertCountEqual(
1124 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1125 findFirst=True)),
1126 [
1127 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1128 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1129 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1130 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1131 ]
1132 )
1133 self.assertCountEqual(
1134 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1135 findFirst=True)),
1136 [
1137 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1138 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1139 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1140 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1141 ]
1142 )
1144 def testQueryResults(self):
1145 """Test querying for data IDs and then manipulating the QueryResults
1146 object returned to perform other queries.
1147 """
1148 registry = self.makeRegistry()
1149 self.loadData(registry, "base.yaml")
1150 self.loadData(registry, "datasets.yaml")
1151 bias = registry.getDatasetType("bias")
1152 flat = registry.getDatasetType("flat")
1153 # Obtain expected results from methods other than those we're testing
1154 # here. That includes:
1155 # - the dimensions of the data IDs we want to query:
1156 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1157 # - the dimensions of some other data IDs we'll extract from that:
1158 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1159 # - the data IDs we expect to obtain from the first queries:
1160 expectedDataIds = DataCoordinateSet(
1161 {
1162 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1163 universe=registry.dimensions)
1164 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1165 },
1166 graph=expectedGraph,
1167 hasFull=False,
1168 hasRecords=False,
1169 )
1170 # - the flat datasets we expect to find from those data IDs, in just
1171 # one collection (so deduplication is irrelevant):
1172 expectedFlats = [
1173 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1174 collections="imported_r"),
1175 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1176 collections="imported_r"),
1177 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1178 collections="imported_r"),
1179 ]
1180 # - the data IDs we expect to extract from that:
1181 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1182 # - the bias datasets we expect to find from those data IDs, after we
1183 # subset-out the physical_filter dimension, both with duplicates:
1184 expectedAllBiases = [
1185 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1186 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1187 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1188 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1189 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1190 ]
1191 # - ...and without duplicates:
1192 expectedDeduplicatedBiases = [
1193 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1194 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1195 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1196 ]
1197 # Test against those expected results, using a "lazy" query for the
1198 # data IDs (which re-executes that query each time we use it to do
1199 # something new).
1200 dataIds = registry.queryDataIds(
1201 ["detector", "physical_filter"],
1202 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1203 instrument="Cam1",
1204 )
1205 self.assertEqual(dataIds.graph, expectedGraph)
1206 self.assertEqual(dataIds.toSet(), expectedDataIds)
1207 self.assertCountEqual(
1208 list(
1209 dataIds.findDatasets(
1210 flat,
1211 collections=["imported_r"],
1212 )
1213 ),
1214 expectedFlats,
1215 )
1216 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1217 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1218 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1219 self.assertCountEqual(
1220 list(
1221 subsetDataIds.findDatasets(
1222 bias,
1223 collections=["imported_r", "imported_g"],
1224 findFirst=False
1225 )
1226 ),
1227 expectedAllBiases
1228 )
1229 self.assertCountEqual(
1230 list(
1231 subsetDataIds.findDatasets(
1232 bias,
1233 collections=["imported_r", "imported_g"],
1234 findFirst=True
1235 )
1236 ), expectedDeduplicatedBiases
1237 )
1238 # Materialize the bias dataset queries (only) by putting the results
1239 # into temporary tables, then repeat those tests.
1240 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1241 findFirst=False).materialize() as biases:
1242 self.assertCountEqual(list(biases), expectedAllBiases)
1243 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1244 findFirst=True).materialize() as biases:
1245 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1246 # Materialize the data ID subset query, but not the dataset queries.
1247 with subsetDataIds.materialize() as subsetDataIds:
1248 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1249 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1250 self.assertCountEqual(
1251 list(
1252 subsetDataIds.findDatasets(
1253 bias,
1254 collections=["imported_r", "imported_g"],
1255 findFirst=False
1256 )
1257 ),
1258 expectedAllBiases
1259 )
1260 self.assertCountEqual(
1261 list(
1262 subsetDataIds.findDatasets(
1263 bias,
1264 collections=["imported_r", "imported_g"],
1265 findFirst=True
1266 )
1267 ), expectedDeduplicatedBiases
1268 )
1269 # Materialize the dataset queries, too.
1270 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1271 findFirst=False).materialize() as biases:
1272 self.assertCountEqual(list(biases), expectedAllBiases)
1273 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1274 findFirst=True).materialize() as biases:
1275 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1276 # Materialize the original query, but none of the follow-up queries.
1277 with dataIds.materialize() as dataIds:
1278 self.assertEqual(dataIds.graph, expectedGraph)
1279 self.assertEqual(dataIds.toSet(), expectedDataIds)
1280 self.assertCountEqual(
1281 list(
1282 dataIds.findDatasets(
1283 flat,
1284 collections=["imported_r"],
1285 )
1286 ),
1287 expectedFlats,
1288 )
1289 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1290 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1291 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1292 self.assertCountEqual(
1293 list(
1294 subsetDataIds.findDatasets(
1295 bias,
1296 collections=["imported_r", "imported_g"],
1297 findFirst=False
1298 )
1299 ),
1300 expectedAllBiases
1301 )
1302 self.assertCountEqual(
1303 list(
1304 subsetDataIds.findDatasets(
1305 bias,
1306 collections=["imported_r", "imported_g"],
1307 findFirst=True
1308 )
1309 ), expectedDeduplicatedBiases
1310 )
1311 # Materialize just the bias dataset queries.
1312 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1313 findFirst=False).materialize() as biases:
1314 self.assertCountEqual(list(biases), expectedAllBiases)
1315 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1316 findFirst=True).materialize() as biases:
1317 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1318 # Materialize the subset data ID query, but not the dataset
1319 # queries.
1320 with subsetDataIds.materialize() as subsetDataIds:
1321 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1322 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1323 self.assertCountEqual(
1324 list(
1325 subsetDataIds.findDatasets(
1326 bias,
1327 collections=["imported_r", "imported_g"],
1328 findFirst=False
1329 )
1330 ),
1331 expectedAllBiases
1332 )
1333 self.assertCountEqual(
1334 list(
1335 subsetDataIds.findDatasets(
1336 bias,
1337 collections=["imported_r", "imported_g"],
1338 findFirst=True
1339 )
1340 ), expectedDeduplicatedBiases
1341 )
1342 # Materialize the bias dataset queries, too, so now we're
1343 # materializing every single step.
1344 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1345 findFirst=False).materialize() as biases:
1346 self.assertCountEqual(list(biases), expectedAllBiases)
1347 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1348 findFirst=True).materialize() as biases:
1349 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1351 def testEmptyDimensionsQueries(self):
1352 """Test Query and QueryResults objects in the case where there are no
1353 dimensions.
1354 """
1355 # Set up test data: one dataset type, two runs, one dataset in each.
1356 registry = self.makeRegistry()
1357 self.loadData(registry, "base.yaml")
1358 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1359 registry.registerDatasetType(schema)
1360 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1361 run1 = "run1"
1362 run2 = "run2"
1363 registry.registerRun(run1)
1364 registry.registerRun(run2)
1365 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1366 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1367 # Query directly for both of the datasets, and each one, one at a time.
1368 self.assertCountEqual(
1369 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1370 [dataset1, dataset2]
1371 )
1372 self.assertEqual(
1373 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1374 [dataset1],
1375 )
1376 self.assertEqual(
1377 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1378 [dataset2],
1379 )
1380 # Query for data IDs with no dimensions.
1381 dataIds = registry.queryDataIds([])
1382 self.assertEqual(
1383 dataIds.toSequence(),
1384 DataCoordinateSequence([dataId], registry.dimensions.empty)
1385 )
1386 # Use queried data IDs to find the datasets.
1387 self.assertCountEqual(
1388 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1389 [dataset1, dataset2],
1390 )
1391 self.assertEqual(
1392 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1393 [dataset1],
1394 )
1395 self.assertEqual(
1396 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1397 [dataset2],
1398 )
1399 # Now materialize the data ID query results and repeat those tests.
1400 with dataIds.materialize() as dataIds:
1401 self.assertEqual(
1402 dataIds.toSequence(),
1403 DataCoordinateSequence([dataId], registry.dimensions.empty)
1404 )
1405 self.assertCountEqual(
1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1407 [dataset1, dataset2],
1408 )
1409 self.assertEqual(
1410 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1411 [dataset1],
1412 )
1413 self.assertEqual(
1414 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1415 [dataset2],
1416 )
1417 # Query for non-empty data IDs, then subset that to get the empty one.
1418 # Repeat the above tests starting from that.
1419 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1420 self.assertEqual(
1421 dataIds.toSequence(),
1422 DataCoordinateSequence([dataId], registry.dimensions.empty)
1423 )
1424 self.assertCountEqual(
1425 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1426 [dataset1, dataset2],
1427 )
1428 self.assertEqual(
1429 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1430 [dataset1],
1431 )
1432 self.assertEqual(
1433 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1434 [dataset2],
1435 )
1436 with dataIds.materialize() as dataIds:
1437 self.assertEqual(
1438 dataIds.toSequence(),
1439 DataCoordinateSequence([dataId], registry.dimensions.empty)
1440 )
1441 self.assertCountEqual(
1442 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1443 [dataset1, dataset2],
1444 )
1445 self.assertEqual(
1446 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1447 [dataset1],
1448 )
1449 self.assertEqual(
1450 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1451 [dataset2],
1452 )
1453 # Query for non-empty data IDs, then materialize, then subset to get
1454 # the empty one. Repeat again.
1455 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1456 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1457 self.assertEqual(
1458 dataIds.toSequence(),
1459 DataCoordinateSequence([dataId], registry.dimensions.empty)
1460 )
1461 self.assertCountEqual(
1462 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1463 [dataset1, dataset2],
1464 )
1465 self.assertEqual(
1466 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1467 [dataset1],
1468 )
1469 self.assertEqual(
1470 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1471 [dataset2],
1472 )
1473 with dataIds.materialize() as dataIds:
1474 self.assertEqual(
1475 dataIds.toSequence(),
1476 DataCoordinateSequence([dataId], registry.dimensions.empty)
1477 )
1478 self.assertCountEqual(
1479 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1480 [dataset1, dataset2],
1481 )
1482 self.assertEqual(
1483 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1484 [dataset1],
1485 )
1486 self.assertEqual(
1487 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1488 [dataset2],
1489 )
1491 def testCalibrationCollections(self):
1492 """Test operations on `~CollectionType.CALIBRATION` collections,
1493 including `Registry.certify`, `Registry.decertify`, and
1494 `Registry.findDataset`.
1495 """
1496 # Setup - make a Registry, fill it with some datasets in
1497 # non-calibration collections.
1498 registry = self.makeRegistry()
1499 self.loadData(registry, "base.yaml")
1500 self.loadData(registry, "datasets.yaml")
1501 # Set up some timestamps.
1502 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1503 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1504 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1505 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1506 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1507 allTimespans = [
1508 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1509 ]
1510 # Get references to some datasets.
1511 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1512 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1513 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1514 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1515 # Register the main calibration collection we'll be working with.
1516 collection = "Cam1/calibs/default"
1517 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1518 # Cannot associate into a calibration collection (no timespan).
1519 with self.assertRaises(TypeError):
1520 registry.associate(collection, [bias2a])
1521 # Certify 2a dataset with [t2, t4) validity.
1522 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1523 # We should not be able to certify 2b with anything overlapping that
1524 # window.
1525 with self.assertRaises(ConflictingDefinitionError):
1526 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1527 with self.assertRaises(ConflictingDefinitionError):
1528 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1529 with self.assertRaises(ConflictingDefinitionError):
1530 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1531 with self.assertRaises(ConflictingDefinitionError):
1532 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1533 with self.assertRaises(ConflictingDefinitionError):
1534 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1535 with self.assertRaises(ConflictingDefinitionError):
1536 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1537 with self.assertRaises(ConflictingDefinitionError):
1538 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1539 with self.assertRaises(ConflictingDefinitionError):
1540 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1541 # We should be able to certify 3a with a range overlapping that window,
1542 # because it's for a different detector.
1543 # We'll certify 3a over [t1, t3).
1544 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1545 # Now we'll certify 2b and 3b together over [t4, ∞).
1546 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1548 # Fetch all associations and check that they are what we expect.
1549 self.assertCountEqual(
1550 list(
1551 registry.queryDatasetAssociations(
1552 "bias",
1553 collections=[collection, "imported_g", "imported_r"],
1554 )
1555 ),
1556 [
1557 DatasetAssociation(
1558 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1559 collection="imported_g",
1560 timespan=None,
1561 ),
1562 DatasetAssociation(
1563 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1564 collection="imported_r",
1565 timespan=None,
1566 ),
1567 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1568 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1569 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1570 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1571 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1572 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1573 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1574 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1575 ]
1576 )
1578 class Ambiguous:
1579 """Tag class to denote lookups that are expected to be ambiguous.
1580 """
1581 pass
1583 def assertLookup(detector: int, timespan: Timespan,
1584 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1585 """Local function that asserts that a bias lookup returns the given
1586 expected result.
1587 """
1588 if expected is Ambiguous:
1589 with self.assertRaises(RuntimeError):
1590 registry.findDataset("bias", collections=collection, instrument="Cam1",
1591 detector=detector, timespan=timespan)
1592 else:
1593 self.assertEqual(
1594 expected,
1595 registry.findDataset("bias", collections=collection, instrument="Cam1",
1596 detector=detector, timespan=timespan)
1597 )
1599 # Systematically test lookups against expected results.
1600 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1601 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1602 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1603 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1604 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1605 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1606 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1607 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1608 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1609 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1610 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1611 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1612 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1613 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1614 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1615 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1616 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1617 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1618 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1619 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1620 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1621 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1622 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1623 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1624 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1625 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1626 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1627 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1628 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1629 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1630 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1631 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1632 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1633 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1634 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1635 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1636 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1637 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1638 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1639 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1640 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1641 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1643 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1644 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1645 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1646 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1647 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1648 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1649 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1650 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1651 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1652 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1653 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1654 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1655 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1656 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1657 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1658 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1659 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1660 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1661 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1662 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1663 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1664 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1665 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1666 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1667 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1668 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1669 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1670 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1671 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1672 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1673 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1674 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1675 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1676 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1677 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1678 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1679 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1680 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1681 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1682 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1683 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1684 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1685 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1686 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1687 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1688 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1690 # Decertify everything, this time with explicit data IDs, then check
1691 # that no lookups succeed.
1692 registry.decertify(
1693 collection, "bias", Timespan(None, None),
1694 dataIds=[
1695 dict(instrument="Cam1", detector=2),
1696 dict(instrument="Cam1", detector=3),
1697 ]
1698 )
1699 for detector in (2, 3):
1700 for timespan in allTimespans:
1701 assertLookup(detector=detector, timespan=timespan, expected=None)
1702 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1703 # those.
1704 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1705 for timespan in allTimespans:
1706 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1707 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1708 # Decertify just bias2 over [t2, t4).
1709 # This should split a single certification row into two (and leave the
1710 # other existing row, for bias3a, alone).
1711 registry.decertify(collection, "bias", Timespan(t2, t4),
1712 dataIds=[dict(instrument="Cam1", detector=2)])
1713 for timespan in allTimespans:
1714 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1715 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1716 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1717 if overlapsBefore and overlapsAfter:
1718 expected = Ambiguous
1719 elif overlapsBefore or overlapsAfter:
1720 expected = bias2a
1721 else:
1722 expected = None
1723 assertLookup(detector=2, timespan=timespan, expected=expected)
1725 def testIngestTimeQuery(self):
1727 registry = self.makeRegistry()
1728 self.loadData(registry, "base.yaml")
1729 self.loadData(registry, "datasets.yaml")
1731 datasets = list(registry.queryDatasets(..., collections=...))
1732 len0 = len(datasets)
1733 self.assertGreater(len0, 0)
1735 where = "ingest_date > T'2000-01-01'"
1736 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1737 len1 = len(datasets)
1738 self.assertEqual(len0, len1)
1740 # no one will ever use this piece of software in 30 years
1741 where = "ingest_date > T'2050-01-01'"
1742 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1743 len2 = len(datasets)
1744 self.assertEqual(len2, 0)
1746 def testTimespanQueries(self):
1747 """Test query expressions involving timespans.
1748 """
1749 registry = self.makeRegistry()
1750 self.loadData(registry, "hsc-rc2-subset.yaml")
1751 # All exposures in the database; mapping from ID to timespan.
1752 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1753 # Just those IDs, sorted (which is also temporal sorting, because HSC
1754 # exposure IDs are monotonically increasing).
1755 ids = sorted(visits.keys())
1756 self.assertGreater(len(ids), 20)
1757 # Pick some quasi-random indexes into `ids` to play with.
1758 i1 = int(len(ids)*0.1)
1759 i2 = int(len(ids)*0.3)
1760 i3 = int(len(ids)*0.6)
1761 i4 = int(len(ids)*0.8)
1762 # Extract some times from those: just before the beginning of i1 (which
1763 # should be after the end of the exposure before), exactly the
1764 # beginning of i2, just after the beginning of i3 (and before its end),
1765 # and the exact end of i4.
1766 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1767 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1768 t2 = visits[ids[i2]].begin
1769 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1770 self.assertLess(t3, visits[ids[i3]].end)
1771 t4 = visits[ids[i4]].end
1772 # Make sure those are actually in order.
1773 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1775 bind = {
1776 "t1": t1,
1777 "t2": t2,
1778 "t3": t3,
1779 "t4": t4,
1780 "ts23": Timespan(t2, t3),
1781 }
1783 def query(where):
1784 """Helper function that queries for visit data IDs and returns
1785 results as a sorted, deduplicated list of visit IDs.
1786 """
1787 return sorted(
1788 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1789 instrument="HSC",
1790 bind=bind,
1791 where=where)}
1792 )
1794 # Try a bunch of timespan queries, mixing up the bounds themselves,
1795 # where they appear in the expression, and how we get the timespan into
1796 # the expression.
1798 # t1 is before the start of i1, so this should not include i1.
1799 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1800 # t2 is exactly at the start of i2, but ends are exclusive, so these
1801 # should not include i2.
1802 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1803 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1804 # t3 is in the middle of i3, so this should include i3.
1805 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1806 # This one should not include t3 by the same reasoning.
1807 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1808 # t4 is exactly at the end of i4, so this should include i4.
1809 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1810 # i4's upper bound of t4 is exclusive so this should not include t4.
1811 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1813 # Now some timespan vs. time scalar queries.
1814 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1815 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1816 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1817 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1818 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1819 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1821 # Empty timespans should not overlap anything.
1822 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
1824 def testCollectionSummaries(self):
1825 """Test recording and retrieval of collection summaries.
1826 """
1827 self.maxDiff = None
1828 registry = self.makeRegistry()
1829 # Importing datasets from yaml should go through the code path where
1830 # we update collection summaries as we insert datasets.
1831 self.loadData(registry, "base.yaml")
1832 self.loadData(registry, "datasets.yaml")
1833 flat = registry.getDatasetType("flat")
1834 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
1835 expected1.datasetTypes.add(registry.getDatasetType("bias"))
1836 expected1.datasetTypes.add(flat)
1837 expected1.dimensions.update_extract(
1838 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
1839 )
1840 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1841 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1842 # Create a chained collection with both of the imported runs; the
1843 # summary should be the same, because it's a union with itself.
1844 chain = "chain"
1845 registry.registerCollection(chain, CollectionType.CHAINED)
1846 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
1847 self.assertEqual(registry.getCollectionSummary(chain), expected1)
1848 # Associate flats only into a tagged collection and a calibration
1849 # collection to check summaries of those.
1850 tag = "tag"
1851 registry.registerCollection(tag, CollectionType.TAGGED)
1852 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
1853 calibs = "calibs"
1854 registry.registerCollection(calibs, CollectionType.CALIBRATION)
1855 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
1856 timespan=Timespan(None, None))
1857 expected2 = expected1.copy()
1858 expected2.datasetTypes.discard("bias")
1859 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1860 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1861 # Explicitly calling Registry.refresh() should load those same
1862 # summaries, via a totally different code path.
1863 registry.refresh()
1864 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1865 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1866 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1867 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1869 def testUnrelatedDimensionQueries(self):
1870 """Test that WHERE expressions in queries can reference dimensions that
1871 are not in the result set.
1872 """
1873 registry = self.makeRegistry()
1874 # There is no data to back this query, but it should still return
1875 # zero records instead of raising.
1876 self.assertFalse(
1877 set(registry.queryDataIds(["visit", "detector"],
1878 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
1879 )