Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from .._registry import (
56 CollectionSummary,
57 CollectionType,
58 ConflictingDefinitionError,
59 InconsistentDataIdError,
60 Registry,
61 RegistryConfig,
62)
63from .._exceptions import MissingCollectionError
64from ..interfaces import ButlerAttributeExistsError
67class RegistryTests(ABC):
68 """Generic tests for the `Registry` class that can be subclassed to
69 generate tests for different configurations.
70 """
72 collectionsManager: Optional[str] = None
73 """Name of the collections manager class, if subclass provides value for
74 this member then it overrides name specified in default configuration
75 (`str`).
76 """
78 @classmethod
79 @abstractmethod
80 def getDataDir(cls) -> str:
81 """Return the root directory containing test data YAML files.
82 """
83 raise NotImplementedError()
85 def makeRegistryConfig(self) -> RegistryConfig:
86 """Create RegistryConfig used to create a registry.
88 This method should be called by a subclass from `makeRegistry`.
89 Returned instance will be pre-configured based on the values of class
90 members, and default-configured for all other parametrs. Subclasses
91 that need default configuration should just instantiate
92 `RegistryConfig` directly.
93 """
94 config = RegistryConfig()
95 if self.collectionsManager:
96 config["managers"]["collections"] = self.collectionsManager
97 return config
99 @abstractmethod
100 def makeRegistry(self) -> Registry:
101 """Return the Registry instance to be tested.
102 """
103 raise NotImplementedError()
105 def loadData(self, registry: Registry, filename: str):
106 """Load registry test data from ``getDataDir/<filename>``,
107 which should be a YAML import/export file.
108 """
109 from ...transfers import YamlRepoImportBackend
110 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
111 backend = YamlRepoImportBackend(stream, registry)
112 backend.register()
113 backend.load(datastore=None)
115 def testOpaque(self):
116 """Tests for `Registry.registerOpaqueTable`,
117 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
118 `Registry.deleteOpaqueData`.
119 """
120 registry = self.makeRegistry()
121 table = "opaque_table_for_testing"
122 registry.registerOpaqueTable(
123 table,
124 spec=ddl.TableSpec(
125 fields=[
126 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
127 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
128 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
129 ],
130 )
131 )
132 rows = [
133 {"id": 1, "name": "one", "count": None},
134 {"id": 2, "name": "two", "count": 5},
135 {"id": 3, "name": "three", "count": 6},
136 ]
137 registry.insertOpaqueData(table, *rows)
138 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
139 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
140 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
141 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
142 registry.deleteOpaqueData(table, id=3)
143 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
144 registry.deleteOpaqueData(table)
145 self.assertEqual([], list(registry.fetchOpaqueData(table)))
147 def testDatasetType(self):
148 """Tests for `Registry.registerDatasetType` and
149 `Registry.getDatasetType`.
150 """
151 registry = self.makeRegistry()
152 # Check valid insert
153 datasetTypeName = "test"
154 storageClass = StorageClass("testDatasetType")
155 registry.storageClasses.registerStorageClass(storageClass)
156 dimensions = registry.dimensions.extract(("instrument", "visit"))
157 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
158 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
159 # Inserting for the first time should return True
160 self.assertTrue(registry.registerDatasetType(inDatasetType))
161 outDatasetType1 = registry.getDatasetType(datasetTypeName)
162 self.assertEqual(outDatasetType1, inDatasetType)
164 # Re-inserting should work
165 self.assertFalse(registry.registerDatasetType(inDatasetType))
166 # Except when they are not identical
167 with self.assertRaises(ConflictingDefinitionError):
168 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
169 registry.registerDatasetType(nonIdenticalDatasetType)
171 # Template can be None
172 datasetTypeName = "testNoneTemplate"
173 storageClass = StorageClass("testDatasetType2")
174 registry.storageClasses.registerStorageClass(storageClass)
175 dimensions = registry.dimensions.extract(("instrument", "visit"))
176 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
177 registry.registerDatasetType(inDatasetType)
178 outDatasetType2 = registry.getDatasetType(datasetTypeName)
179 self.assertEqual(outDatasetType2, inDatasetType)
181 allTypes = set(registry.queryDatasetTypes())
182 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
184 def testDimensions(self):
185 """Tests for `Registry.insertDimensionData`,
186 `Registry.syncDimensionData`, and `Registry.expandDataId`.
187 """
188 registry = self.makeRegistry()
189 dimensionName = "instrument"
190 dimension = registry.dimensions[dimensionName]
191 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
192 "class_name": "lsst.obs.base.Instrument"}
193 registry.insertDimensionData(dimensionName, dimensionValue)
194 # Inserting the same value twice should fail
195 with self.assertRaises(sqlalchemy.exc.IntegrityError):
196 registry.insertDimensionData(dimensionName, dimensionValue)
197 # expandDataId should retrieve the record we just inserted
198 self.assertEqual(
199 registry.expandDataId(
200 instrument="DummyCam",
201 graph=dimension.graph
202 ).records[dimensionName].toDict(),
203 dimensionValue
204 )
205 # expandDataId should raise if there is no record with the given ID.
206 with self.assertRaises(LookupError):
207 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
208 # band doesn't have a table; insert should fail.
209 with self.assertRaises(TypeError):
210 registry.insertDimensionData("band", {"band": "i"})
211 dimensionName2 = "physical_filter"
212 dimension2 = registry.dimensions[dimensionName2]
213 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
214 # Missing required dependency ("instrument") should fail
215 with self.assertRaises(KeyError):
216 registry.insertDimensionData(dimensionName2, dimensionValue2)
217 # Adding required dependency should fix the failure
218 dimensionValue2["instrument"] = "DummyCam"
219 registry.insertDimensionData(dimensionName2, dimensionValue2)
220 # expandDataId should retrieve the record we just inserted.
221 self.assertEqual(
222 registry.expandDataId(
223 instrument="DummyCam", physical_filter="DummyCam_i",
224 graph=dimension2.graph
225 ).records[dimensionName2].toDict(),
226 dimensionValue2
227 )
228 # Use syncDimensionData to insert a new record successfully.
229 dimensionName3 = "detector"
230 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
231 "name_in_raft": "zero", "purpose": "SCIENCE"}
232 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
233 # Sync that again. Note that one field ("raft") is NULL, and that
234 # should be okay.
235 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
236 # Now try that sync with the same primary key but a different value.
237 # This should fail.
238 with self.assertRaises(ConflictingDefinitionError):
239 registry.syncDimensionData(
240 dimensionName3,
241 {"instrument": "DummyCam", "id": 1, "full_name": "one",
242 "name_in_raft": "four", "purpose": "SCIENCE"}
243 )
245 @unittest.skipIf(np is None, "numpy not available.")
246 def testNumpyDataId(self):
247 """Test that we can use a numpy int in a dataId."""
248 registry = self.makeRegistry()
249 dimensionEntries = [
250 ("instrument", {"instrument": "DummyCam"}),
251 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
252 # Using an np.int64 here fails unless Records.fromDict is also
253 # patched to look for numbers.Integral
254 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
255 ]
256 for args in dimensionEntries:
257 registry.insertDimensionData(*args)
259 # Try a normal integer and something that looks like an int but
260 # is not.
261 for visit_id in (42, np.int64(42)):
262 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
263 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
264 self.assertEqual(expanded["visit"], int(visit_id))
265 self.assertIsInstance(expanded["visit"], int)
267 def testDataIdRelationships(self):
268 """Test that `Registry.expandDataId` raises an exception when the given
269 keys are inconsistent.
270 """
271 registry = self.makeRegistry()
272 self.loadData(registry, "base.yaml")
273 # Insert a few more dimension records for the next test.
274 registry.insertDimensionData(
275 "exposure",
276 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
277 )
278 registry.insertDimensionData(
279 "exposure",
280 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
281 )
282 registry.insertDimensionData(
283 "visit_system",
284 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
285 )
286 registry.insertDimensionData(
287 "visit",
288 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
289 )
290 registry.insertDimensionData(
291 "visit_definition",
292 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
293 )
294 with self.assertRaises(InconsistentDataIdError):
295 registry.expandDataId(
296 {"instrument": "Cam1", "visit": 1, "exposure": 2},
297 )
299 def testDataset(self):
300 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
301 and `Registry.removeDatasets`.
302 """
303 registry = self.makeRegistry()
304 self.loadData(registry, "base.yaml")
305 run = "test"
306 registry.registerRun(run)
307 datasetType = registry.getDatasetType("bias")
308 dataId = {"instrument": "Cam1", "detector": 2}
309 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
310 outRef = registry.getDataset(ref.id)
311 self.assertIsNotNone(ref.id)
312 self.assertEqual(ref, outRef)
313 with self.assertRaises(ConflictingDefinitionError):
314 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
315 registry.removeDatasets([ref])
316 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
318 def testFindDataset(self):
319 """Tests for `Registry.findDataset`.
320 """
321 registry = self.makeRegistry()
322 self.loadData(registry, "base.yaml")
323 run = "test"
324 datasetType = registry.getDatasetType("bias")
325 dataId = {"instrument": "Cam1", "detector": 4}
326 registry.registerRun(run)
327 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
328 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
329 self.assertEqual(outputRef, inputRef)
330 # Check that retrieval with invalid dataId raises
331 with self.assertRaises(LookupError):
332 dataId = {"instrument": "Cam1"} # no detector
333 registry.findDataset(datasetType, dataId, collections=run)
334 # Check that different dataIds match to different datasets
335 dataId1 = {"instrument": "Cam1", "detector": 1}
336 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
337 dataId2 = {"instrument": "Cam1", "detector": 2}
338 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
339 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
340 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
341 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
342 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
343 # Check that requesting a non-existing dataId returns None
344 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
345 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
347 def testDatasetTypeComponentQueries(self):
348 """Test component options when querying for dataset types.
349 """
350 registry = self.makeRegistry()
351 self.loadData(registry, "base.yaml")
352 self.loadData(registry, "datasets.yaml")
353 # Test querying for dataset types with different inputs.
354 # First query for all dataset types; components should only be included
355 # when components=True.
356 self.assertEqual(
357 {"bias", "flat"},
358 NamedValueSet(registry.queryDatasetTypes()).names
359 )
360 self.assertEqual(
361 {"bias", "flat"},
362 NamedValueSet(registry.queryDatasetTypes(components=False)).names
363 )
364 self.assertLess(
365 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
366 NamedValueSet(registry.queryDatasetTypes(components=True)).names
367 )
368 # Use a pattern that can match either parent or components. Again,
369 # components are only returned if components=True.
370 self.assertEqual(
371 {"bias"},
372 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
373 )
374 self.assertEqual(
375 {"bias"},
376 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
377 )
378 self.assertLess(
379 {"bias", "bias.wcs"},
380 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
381 )
382 # This pattern matches only a component. In this case we also return
383 # that component dataset type if components=None.
384 self.assertEqual(
385 {"bias.wcs"},
386 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
387 )
388 self.assertEqual(
389 set(),
390 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
391 )
392 self.assertEqual(
393 {"bias.wcs"},
394 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
395 )
396 # Add a dataset type using a StorageClass that we'll then remove; check
397 # that this does not affect our ability to query for dataset types
398 # (though it will warn).
399 tempStorageClass = StorageClass(
400 name="TempStorageClass",
401 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
402 )
403 registry.storageClasses.registerStorageClass(tempStorageClass)
404 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
405 universe=registry.dimensions)
406 registry.registerDatasetType(datasetType)
407 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
408 datasetType._storageClass = None
409 del tempStorageClass
410 # Querying for all dataset types, including components, should include
411 # at least all non-component dataset types (and I don't want to
412 # enumerate all of the Exposure components for bias and flat here).
413 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
414 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
415 self.assertIn("TempStorageClass", cm.output[0])
416 self.assertLess({"bias", "flat", "temporary"}, everything.names)
417 # It should not include "temporary.columns", because we tried to remove
418 # the storage class that would tell it about that. So if the next line
419 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
420 # this part of the test isn't doing anything, because the _unregister
421 # call about isn't simulating the real-life case we want it to
422 # simulate, in which different versions of daf_butler in entirely
423 # different Python processes interact with the same repo.
424 self.assertNotIn("temporary.data", everything.names)
425 # Query for dataset types that start with "temp". This should again
426 # not include the component, and also not fail.
427 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
428 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
429 self.assertIn("TempStorageClass", cm.output[0])
430 self.assertEqual({"temporary"}, startsWithTemp.names)
432 def testComponentLookups(self):
433 """Test searching for component datasets via their parents.
434 """
435 registry = self.makeRegistry()
436 self.loadData(registry, "base.yaml")
437 self.loadData(registry, "datasets.yaml")
438 # Test getting the child dataset type (which does still exist in the
439 # Registry), and check for consistency with
440 # DatasetRef.makeComponentRef.
441 collection = "imported_g"
442 parentType = registry.getDatasetType("bias")
443 childType = registry.getDatasetType("bias.wcs")
444 parentRefResolved = registry.findDataset(parentType, collections=collection,
445 instrument="Cam1", detector=1)
446 self.assertIsInstance(parentRefResolved, DatasetRef)
447 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
448 # Search for a single dataset with findDataset.
449 childRef1 = registry.findDataset("bias.wcs", collections=collection,
450 dataId=parentRefResolved.dataId)
451 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
452 # Search for detector data IDs constrained by component dataset
453 # existence with queryDataIds.
454 dataIds = registry.queryDataIds(
455 ["detector"],
456 datasets=["bias.wcs"],
457 collections=collection,
458 ).toSet()
459 self.assertEqual(
460 dataIds,
461 DataCoordinateSet(
462 {
463 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
464 for d in (1, 2, 3)
465 },
466 parentType.dimensions,
467 )
468 )
469 # Search for multiple datasets of a single type with queryDatasets.
470 childRefs2 = set(registry.queryDatasets(
471 "bias.wcs",
472 collections=collection,
473 ))
474 self.assertEqual(
475 {ref.unresolved() for ref in childRefs2},
476 {DatasetRef(childType, dataId) for dataId in dataIds}
477 )
479 def testCollections(self):
480 """Tests for registry methods that manage collections.
481 """
482 registry = self.makeRegistry()
483 self.loadData(registry, "base.yaml")
484 self.loadData(registry, "datasets.yaml")
485 run1 = "imported_g"
486 run2 = "imported_r"
487 # Test setting a collection docstring after it has been created.
488 registry.setCollectionDocumentation(run1, "doc for run1")
489 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
490 registry.setCollectionDocumentation(run1, None)
491 self.assertIsNone(registry.getCollectionDocumentation(run1))
492 datasetType = "bias"
493 # Find some datasets via their run's collection.
494 dataId1 = {"instrument": "Cam1", "detector": 1}
495 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
496 self.assertIsNotNone(ref1)
497 dataId2 = {"instrument": "Cam1", "detector": 2}
498 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
499 self.assertIsNotNone(ref2)
500 # Associate those into a new collection,then look for them there.
501 tag1 = "tag1"
502 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
503 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
504 registry.associate(tag1, [ref1, ref2])
505 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
506 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
507 # Disassociate one and verify that we can't it there anymore...
508 registry.disassociate(tag1, [ref1])
509 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
510 # ...but we can still find ref2 in tag1, and ref1 in the run.
511 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
512 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
513 collections = set(registry.queryCollections())
514 self.assertEqual(collections, {run1, run2, tag1})
515 # Associate both refs into tag1 again; ref2 is already there, but that
516 # should be a harmless no-op.
517 registry.associate(tag1, [ref1, ref2])
518 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
519 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
520 # Get a different dataset (from a different run) that has the same
521 # dataset type and data ID as ref2.
522 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
523 self.assertNotEqual(ref2, ref2b)
524 # Attempting to associate that into tag1 should be an error.
525 with self.assertRaises(ConflictingDefinitionError):
526 registry.associate(tag1, [ref2b])
527 # That error shouldn't have messed up what we had before.
528 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
529 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
530 # Attempt to associate the conflicting dataset again, this time with
531 # a dataset that isn't in the collection and won't cause a conflict.
532 # Should also fail without modifying anything.
533 dataId3 = {"instrument": "Cam1", "detector": 3}
534 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
535 with self.assertRaises(ConflictingDefinitionError):
536 registry.associate(tag1, [ref3, ref2b])
537 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
538 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
539 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
540 # Register a chained collection that searches [tag1, run2]
541 chain1 = "chain1"
542 registry.registerCollection(chain1, type=CollectionType.CHAINED)
543 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
544 # Chained collection exists, but has no collections in it.
545 self.assertFalse(registry.getCollectionChain(chain1))
546 # If we query for all collections, we should get the chained collection
547 # only if we don't ask to flatten it (i.e. yield only its children).
548 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
549 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
550 # Attempt to set its child collections to something circular; that
551 # should fail.
552 with self.assertRaises(ValueError):
553 registry.setCollectionChain(chain1, [tag1, chain1])
554 # Add the child collections.
555 registry.setCollectionChain(chain1, [tag1, run2])
556 self.assertEqual(
557 list(registry.getCollectionChain(chain1)),
558 [tag1, run2]
559 )
560 # Searching for dataId1 or dataId2 in the chain should return ref1 and
561 # ref2, because both are in tag1.
562 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
563 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
564 # Now disassociate ref2 from tag1. The search (for bias) with
565 # dataId2 in chain1 should then:
566 # 1. not find it in tag1
567 # 2. find a different dataset in run2
568 registry.disassociate(tag1, [ref2])
569 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
570 self.assertNotEqual(ref2b, ref2)
571 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
572 # Define a new chain so we can test recursive chains.
573 chain2 = "chain2"
574 registry.registerCollection(chain2, type=CollectionType.CHAINED)
575 registry.setCollectionChain(chain2, [run2, chain1])
576 # Query for collections matching a regex.
577 self.assertCountEqual(
578 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
579 ["imported_r", "imported_g"]
580 )
581 # Query for collections matching a regex or an explicit str.
582 self.assertCountEqual(
583 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
584 ["imported_r", "imported_g", "chain1"]
585 )
586 # Search for bias with dataId1 should find it via tag1 in chain2,
587 # recursing, because is not in run1.
588 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
589 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
590 # Search for bias with dataId2 should find it in run2 (ref2b).
591 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
592 # Search for a flat that is in run2. That should not be found
593 # at the front of chain2, because of the restriction to bias
594 # on run2 there, but it should be found in at the end of chain1.
595 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
596 ref4 = registry.findDataset("flat", dataId4, collections=run2)
597 self.assertIsNotNone(ref4)
598 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
599 # Deleting a collection that's part of a CHAINED collection is not
600 # allowed, and is exception-safe.
601 with self.assertRaises(Exception):
602 registry.removeCollection(run2)
603 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
604 with self.assertRaises(Exception):
605 registry.removeCollection(chain1)
606 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
607 # Actually remove chain2, test that it's gone by asking for its type.
608 registry.removeCollection(chain2)
609 with self.assertRaises(MissingCollectionError):
610 registry.getCollectionType(chain2)
611 # Actually remove run2 and chain1, which should work now.
612 registry.removeCollection(chain1)
613 registry.removeCollection(run2)
614 with self.assertRaises(MissingCollectionError):
615 registry.getCollectionType(run2)
616 with self.assertRaises(MissingCollectionError):
617 registry.getCollectionType(chain1)
618 # Remove tag1 as well, just to test that we can remove TAGGED
619 # collections.
620 registry.removeCollection(tag1)
621 with self.assertRaises(MissingCollectionError):
622 registry.getCollectionType(tag1)
624 def testBasicTransaction(self):
625 """Test that all operations within a single transaction block are
626 rolled back if an exception propagates out of the block.
627 """
628 registry = self.makeRegistry()
629 storageClass = StorageClass("testDatasetType")
630 registry.storageClasses.registerStorageClass(storageClass)
631 with registry.transaction():
632 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
633 with self.assertRaises(ValueError):
634 with registry.transaction():
635 registry.insertDimensionData("instrument", {"name": "Cam2"})
636 raise ValueError("Oops, something went wrong")
637 # Cam1 should exist
638 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
639 # But Cam2 and Cam3 should both not exist
640 with self.assertRaises(LookupError):
641 registry.expandDataId(instrument="Cam2")
642 with self.assertRaises(LookupError):
643 registry.expandDataId(instrument="Cam3")
645 def testNestedTransaction(self):
646 """Test that operations within a transaction block are not rolled back
647 if an exception propagates out of an inner transaction block and is
648 then caught.
649 """
650 registry = self.makeRegistry()
651 dimension = registry.dimensions["instrument"]
652 dataId1 = {"instrument": "DummyCam"}
653 dataId2 = {"instrument": "DummyCam2"}
654 checkpointReached = False
655 with registry.transaction():
656 # This should be added and (ultimately) committed.
657 registry.insertDimensionData(dimension, dataId1)
658 with self.assertRaises(sqlalchemy.exc.IntegrityError):
659 with registry.transaction(savepoint=True):
660 # This does not conflict, and should succeed (but not
661 # be committed).
662 registry.insertDimensionData(dimension, dataId2)
663 checkpointReached = True
664 # This should conflict and raise, triggerring a rollback
665 # of the previous insertion within the same transaction
666 # context, but not the original insertion in the outer
667 # block.
668 registry.insertDimensionData(dimension, dataId1)
669 self.assertTrue(checkpointReached)
670 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
671 with self.assertRaises(LookupError):
672 registry.expandDataId(dataId2, graph=dimension.graph)
674 def testInstrumentDimensions(self):
675 """Test queries involving only instrument dimensions, with no joins to
676 skymap."""
677 registry = self.makeRegistry()
679 # need a bunch of dimensions and datasets for test
680 registry.insertDimensionData(
681 "instrument",
682 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
683 )
684 registry.insertDimensionData(
685 "physical_filter",
686 dict(instrument="DummyCam", name="dummy_r", band="r"),
687 dict(instrument="DummyCam", name="dummy_i", band="i"),
688 )
689 registry.insertDimensionData(
690 "detector",
691 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
692 )
693 registry.insertDimensionData(
694 "visit_system",
695 dict(instrument="DummyCam", id=1, name="default"),
696 )
697 registry.insertDimensionData(
698 "visit",
699 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
700 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
701 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
702 )
703 registry.insertDimensionData(
704 "exposure",
705 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
706 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
707 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
708 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
709 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
710 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
711 )
712 registry.insertDimensionData(
713 "visit_definition",
714 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
715 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
716 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
717 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
718 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
719 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
720 )
721 # dataset types
722 run1 = "test1_r"
723 run2 = "test2_r"
724 tagged2 = "test2_t"
725 registry.registerRun(run1)
726 registry.registerRun(run2)
727 registry.registerCollection(tagged2)
728 storageClass = StorageClass("testDataset")
729 registry.storageClasses.registerStorageClass(storageClass)
730 rawType = DatasetType(name="RAW",
731 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
732 storageClass=storageClass)
733 registry.registerDatasetType(rawType)
734 calexpType = DatasetType(name="CALEXP",
735 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
736 storageClass=storageClass)
737 registry.registerDatasetType(calexpType)
739 # add pre-existing datasets
740 for exposure in (100, 101, 110, 111):
741 for detector in (1, 2, 3):
742 # note that only 3 of 5 detectors have datasets
743 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
744 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
745 # exposures 100 and 101 appear in both run1 and tagged2.
746 # 100 has different datasets in the different collections
747 # 101 has the same dataset in both collections.
748 if exposure == 100:
749 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
750 if exposure in (100, 101):
751 registry.associate(tagged2, [ref])
752 # Add pre-existing datasets to tagged2.
753 for exposure in (200, 201):
754 for detector in (3, 4, 5):
755 # note that only 3 of 5 detectors have datasets
756 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
757 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
758 registry.associate(tagged2, [ref])
760 dimensions = DimensionGraph(
761 registry.dimensions,
762 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
763 )
764 # Test that single dim string works as well as list of str
765 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
766 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
767 self.assertEqual(rows, rowsI)
768 # with empty expression
769 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
770 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
771 for dataId in rows:
772 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
773 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
774 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
775 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
776 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
777 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
778 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
779 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
780 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
781 (100, 101, 110, 111))
782 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
783 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
785 # second collection
786 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
787 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
788 for dataId in rows:
789 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
790 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
791 (100, 101, 200, 201))
792 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
793 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
795 # with two input datasets
796 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
797 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
798 for dataId in rows:
799 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
800 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
801 (100, 101, 110, 111, 200, 201))
802 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
803 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
805 # limit to single visit
806 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
807 where="visit = 10", instrument="DummyCam").toSet()
808 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
809 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
810 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
811 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
813 # more limiting expression, using link names instead of Table.column
814 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
815 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
816 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
817 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
818 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
819 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
821 # expression excludes everything
822 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
823 where="visit > 1000", instrument="DummyCam").toSet()
824 self.assertEqual(len(rows), 0)
826 # Selecting by physical_filter, this is not in the dimensions, but it
827 # is a part of the full expression so it should work too.
828 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
829 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
830 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
831 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
832 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
833 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
835 def testSkyMapDimensions(self):
836 """Tests involving only skymap dimensions, no joins to instrument."""
837 registry = self.makeRegistry()
839 # need a bunch of dimensions and datasets for test, we want
840 # "band" in the test so also have to add physical_filter
841 # dimensions
842 registry.insertDimensionData(
843 "instrument",
844 dict(instrument="DummyCam")
845 )
846 registry.insertDimensionData(
847 "physical_filter",
848 dict(instrument="DummyCam", name="dummy_r", band="r"),
849 dict(instrument="DummyCam", name="dummy_i", band="i"),
850 )
851 registry.insertDimensionData(
852 "skymap",
853 dict(name="DummyMap", hash="sha!".encode("utf8"))
854 )
855 for tract in range(10):
856 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
857 registry.insertDimensionData(
858 "patch",
859 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
860 for patch in range(10)]
861 )
863 # dataset types
864 run = "test"
865 registry.registerRun(run)
866 storageClass = StorageClass("testDataset")
867 registry.storageClasses.registerStorageClass(storageClass)
868 calexpType = DatasetType(name="deepCoadd_calexp",
869 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
870 "band")),
871 storageClass=storageClass)
872 registry.registerDatasetType(calexpType)
873 mergeType = DatasetType(name="deepCoadd_mergeDet",
874 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
875 storageClass=storageClass)
876 registry.registerDatasetType(mergeType)
877 measType = DatasetType(name="deepCoadd_meas",
878 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
879 "band")),
880 storageClass=storageClass)
881 registry.registerDatasetType(measType)
883 dimensions = DimensionGraph(
884 registry.dimensions,
885 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
886 | measType.dimensions.required)
887 )
889 # add pre-existing datasets
890 for tract in (1, 3, 5):
891 for patch in (2, 4, 6, 7):
892 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
893 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
894 for aFilter in ("i", "r"):
895 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
896 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
898 # with empty expression
899 rows = registry.queryDataIds(dimensions,
900 datasets=[calexpType, mergeType], collections=run).toSet()
901 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
902 for dataId in rows:
903 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
904 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
905 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
906 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
908 # limit to 2 tracts and 2 patches
909 rows = registry.queryDataIds(dimensions,
910 datasets=[calexpType, mergeType], collections=run,
911 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
912 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
913 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
914 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
915 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
917 # limit to single filter
918 rows = registry.queryDataIds(dimensions,
919 datasets=[calexpType, mergeType], collections=run,
920 where="band = 'i'").toSet()
921 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
922 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
923 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
924 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
926 # expression excludes everything, specifying non-existing skymap is
927 # not a fatal error, it's operator error
928 rows = registry.queryDataIds(dimensions,
929 datasets=[calexpType, mergeType], collections=run,
930 where="skymap = 'Mars'").toSet()
931 self.assertEqual(len(rows), 0)
933 def testSpatialJoin(self):
934 """Test queries that involve spatial overlap joins.
935 """
936 registry = self.makeRegistry()
937 self.loadData(registry, "hsc-rc2-subset.yaml")
939 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
940 # the TopologicalFamily they belong to. We'll relate all elements in
941 # each family to all of the elements in each other family.
942 families = defaultdict(set)
943 # Dictionary of {element.name: {dataId: region}}.
944 regions = {}
945 for element in registry.dimensions.getDatabaseElements():
946 if element.spatial is not None:
947 families[element.spatial.name].add(element)
948 regions[element.name] = {
949 record.dataId: record.region for record in registry.queryDimensionRecords(element)
950 }
952 # If this check fails, it's not necessarily a problem - it may just be
953 # a reasonable change to the default dimension definitions - but the
954 # test below depends on there being more than one family to do anything
955 # useful.
956 self.assertEqual(len(families), 2)
958 # Overlap DatabaseDimensionElements with each other.
959 for family1, family2 in itertools.combinations(families, 2):
960 for element1, element2 in itertools.product(families[family1], families[family2]):
961 graph = DimensionGraph.union(element1.graph, element2.graph)
962 # Construct expected set of overlapping data IDs via a
963 # brute-force comparison of the regions we've already fetched.
964 expected = {
965 DataCoordinate.standardize(
966 {**dataId1.byName(), **dataId2.byName()},
967 graph=graph
968 )
969 for (dataId1, region1), (dataId2, region2)
970 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
971 if not region1.isDisjointFrom(region2)
972 }
973 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
974 queried = set(registry.queryDataIds(graph))
975 self.assertEqual(expected, queried)
977 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
978 commonSkyPix = registry.dimensions.commonSkyPix
979 for elementName, regions in regions.items():
980 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
981 expected = set()
982 for dataId, region in regions.items():
983 for begin, end in commonSkyPix.pixelization.envelope(region):
984 expected.update(
985 DataCoordinate.standardize(
986 {commonSkyPix.name: index, **dataId.byName()},
987 graph=graph
988 )
989 for index in range(begin, end)
990 )
991 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
992 queried = set(registry.queryDataIds(graph))
993 self.assertEqual(expected, queried)
995 def testAbstractQuery(self):
996 """Test that we can run a query that just lists the known
997 bands. This is tricky because band is
998 backed by a query against physical_filter.
999 """
1000 registry = self.makeRegistry()
1001 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1002 registry.insertDimensionData(
1003 "physical_filter",
1004 dict(instrument="DummyCam", name="dummy_i", band="i"),
1005 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1006 dict(instrument="DummyCam", name="dummy_r", band="r"),
1007 )
1008 rows = registry.queryDataIds(["band"]).toSet()
1009 self.assertCountEqual(
1010 rows,
1011 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1012 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1013 )
1015 def testAttributeManager(self):
1016 """Test basic functionality of attribute manager.
1017 """
1018 # number of attributes with schema versions in a fresh database,
1019 # 6 managers with 3 records per manager, plus config for dimensions
1020 VERSION_COUNT = 6 * 3 + 1
1022 registry = self.makeRegistry()
1023 attributes = registry._managers.attributes
1025 # check what get() returns for non-existing key
1026 self.assertIsNone(attributes.get("attr"))
1027 self.assertEqual(attributes.get("attr", ""), "")
1028 self.assertEqual(attributes.get("attr", "Value"), "Value")
1029 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1031 # cannot store empty key or value
1032 with self.assertRaises(ValueError):
1033 attributes.set("", "value")
1034 with self.assertRaises(ValueError):
1035 attributes.set("attr", "")
1037 # set value of non-existing key
1038 attributes.set("attr", "value")
1039 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1040 self.assertEqual(attributes.get("attr"), "value")
1042 # update value of existing key
1043 with self.assertRaises(ButlerAttributeExistsError):
1044 attributes.set("attr", "value2")
1046 attributes.set("attr", "value2", force=True)
1047 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1048 self.assertEqual(attributes.get("attr"), "value2")
1050 # delete existing key
1051 self.assertTrue(attributes.delete("attr"))
1052 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1054 # delete non-existing key
1055 self.assertFalse(attributes.delete("non-attr"))
1057 # store bunch of keys and get the list back
1058 data = [
1059 ("version.core", "1.2.3"),
1060 ("version.dimensions", "3.2.1"),
1061 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1062 ]
1063 for key, value in data:
1064 attributes.set(key, value)
1065 items = dict(attributes.items())
1066 for key, value in data:
1067 self.assertEqual(items[key], value)
1069 def testQueryDatasetsDeduplication(self):
1070 """Test that the findFirst option to queryDatasets selects datasets
1071 from collections in the order given".
1072 """
1073 registry = self.makeRegistry()
1074 self.loadData(registry, "base.yaml")
1075 self.loadData(registry, "datasets.yaml")
1076 self.assertCountEqual(
1077 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1078 [
1079 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1080 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1081 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1082 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1083 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1084 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1085 ]
1086 )
1087 self.assertCountEqual(
1088 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1089 findFirst=True)),
1090 [
1091 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1092 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1093 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1094 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1095 ]
1096 )
1097 self.assertCountEqual(
1098 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1099 findFirst=True)),
1100 [
1101 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1102 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1103 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1104 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1105 ]
1106 )
1108 def testQueryResults(self):
1109 """Test querying for data IDs and then manipulating the QueryResults
1110 object returned to perform other queries.
1111 """
1112 registry = self.makeRegistry()
1113 self.loadData(registry, "base.yaml")
1114 self.loadData(registry, "datasets.yaml")
1115 bias = registry.getDatasetType("bias")
1116 flat = registry.getDatasetType("flat")
1117 # Obtain expected results from methods other than those we're testing
1118 # here. That includes:
1119 # - the dimensions of the data IDs we want to query:
1120 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1121 # - the dimensions of some other data IDs we'll extract from that:
1122 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1123 # - the data IDs we expect to obtain from the first queries:
1124 expectedDataIds = DataCoordinateSet(
1125 {
1126 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1127 universe=registry.dimensions)
1128 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1129 },
1130 graph=expectedGraph,
1131 hasFull=False,
1132 hasRecords=False,
1133 )
1134 # - the flat datasets we expect to find from those data IDs, in just
1135 # one collection (so deduplication is irrelevant):
1136 expectedFlats = [
1137 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1138 collections="imported_r"),
1139 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1140 collections="imported_r"),
1141 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1142 collections="imported_r"),
1143 ]
1144 # - the data IDs we expect to extract from that:
1145 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1146 # - the bias datasets we expect to find from those data IDs, after we
1147 # subset-out the physical_filter dimension, both with duplicates:
1148 expectedAllBiases = [
1149 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1150 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1151 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1152 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1153 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1154 ]
1155 # - ...and without duplicates:
1156 expectedDeduplicatedBiases = [
1157 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1158 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1159 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1160 ]
1161 # Test against those expected results, using a "lazy" query for the
1162 # data IDs (which re-executes that query each time we use it to do
1163 # something new).
1164 dataIds = registry.queryDataIds(
1165 ["detector", "physical_filter"],
1166 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1167 instrument="Cam1",
1168 )
1169 self.assertEqual(dataIds.graph, expectedGraph)
1170 self.assertEqual(dataIds.toSet(), expectedDataIds)
1171 self.assertCountEqual(
1172 list(
1173 dataIds.findDatasets(
1174 flat,
1175 collections=["imported_r"],
1176 )
1177 ),
1178 expectedFlats,
1179 )
1180 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1181 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1182 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1183 self.assertCountEqual(
1184 list(
1185 subsetDataIds.findDatasets(
1186 bias,
1187 collections=["imported_r", "imported_g"],
1188 findFirst=False
1189 )
1190 ),
1191 expectedAllBiases
1192 )
1193 self.assertCountEqual(
1194 list(
1195 subsetDataIds.findDatasets(
1196 bias,
1197 collections=["imported_r", "imported_g"],
1198 findFirst=True
1199 )
1200 ), expectedDeduplicatedBiases
1201 )
1202 # Materialize the bias dataset queries (only) by putting the results
1203 # into temporary tables, then repeat those tests.
1204 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1205 findFirst=False).materialize() as biases:
1206 self.assertCountEqual(list(biases), expectedAllBiases)
1207 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1208 findFirst=True).materialize() as biases:
1209 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1210 # Materialize the data ID subset query, but not the dataset queries.
1211 with subsetDataIds.materialize() as subsetDataIds:
1212 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1213 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1214 self.assertCountEqual(
1215 list(
1216 subsetDataIds.findDatasets(
1217 bias,
1218 collections=["imported_r", "imported_g"],
1219 findFirst=False
1220 )
1221 ),
1222 expectedAllBiases
1223 )
1224 self.assertCountEqual(
1225 list(
1226 subsetDataIds.findDatasets(
1227 bias,
1228 collections=["imported_r", "imported_g"],
1229 findFirst=True
1230 )
1231 ), expectedDeduplicatedBiases
1232 )
1233 # Materialize the dataset queries, too.
1234 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1235 findFirst=False).materialize() as biases:
1236 self.assertCountEqual(list(biases), expectedAllBiases)
1237 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1238 findFirst=True).materialize() as biases:
1239 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1240 # Materialize the original query, but none of the follow-up queries.
1241 with dataIds.materialize() as dataIds:
1242 self.assertEqual(dataIds.graph, expectedGraph)
1243 self.assertEqual(dataIds.toSet(), expectedDataIds)
1244 self.assertCountEqual(
1245 list(
1246 dataIds.findDatasets(
1247 flat,
1248 collections=["imported_r"],
1249 )
1250 ),
1251 expectedFlats,
1252 )
1253 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1254 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1255 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1256 self.assertCountEqual(
1257 list(
1258 subsetDataIds.findDatasets(
1259 bias,
1260 collections=["imported_r", "imported_g"],
1261 findFirst=False
1262 )
1263 ),
1264 expectedAllBiases
1265 )
1266 self.assertCountEqual(
1267 list(
1268 subsetDataIds.findDatasets(
1269 bias,
1270 collections=["imported_r", "imported_g"],
1271 findFirst=True
1272 )
1273 ), expectedDeduplicatedBiases
1274 )
1275 # Materialize just the bias dataset queries.
1276 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1277 findFirst=False).materialize() as biases:
1278 self.assertCountEqual(list(biases), expectedAllBiases)
1279 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1280 findFirst=True).materialize() as biases:
1281 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1282 # Materialize the subset data ID query, but not the dataset
1283 # queries.
1284 with subsetDataIds.materialize() as subsetDataIds:
1285 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1286 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1287 self.assertCountEqual(
1288 list(
1289 subsetDataIds.findDatasets(
1290 bias,
1291 collections=["imported_r", "imported_g"],
1292 findFirst=False
1293 )
1294 ),
1295 expectedAllBiases
1296 )
1297 self.assertCountEqual(
1298 list(
1299 subsetDataIds.findDatasets(
1300 bias,
1301 collections=["imported_r", "imported_g"],
1302 findFirst=True
1303 )
1304 ), expectedDeduplicatedBiases
1305 )
1306 # Materialize the bias dataset queries, too, so now we're
1307 # materializing every single step.
1308 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1309 findFirst=False).materialize() as biases:
1310 self.assertCountEqual(list(biases), expectedAllBiases)
1311 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1312 findFirst=True).materialize() as biases:
1313 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1315 def testEmptyDimensionsQueries(self):
1316 """Test Query and QueryResults objects in the case where there are no
1317 dimensions.
1318 """
1319 # Set up test data: one dataset type, two runs, one dataset in each.
1320 registry = self.makeRegistry()
1321 self.loadData(registry, "base.yaml")
1322 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1323 registry.registerDatasetType(schema)
1324 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1325 run1 = "run1"
1326 run2 = "run2"
1327 registry.registerRun(run1)
1328 registry.registerRun(run2)
1329 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1330 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1331 # Query directly for both of the datasets, and each one, one at a time.
1332 self.assertCountEqual(
1333 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1334 [dataset1, dataset2]
1335 )
1336 self.assertEqual(
1337 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1338 [dataset1],
1339 )
1340 self.assertEqual(
1341 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1342 [dataset2],
1343 )
1344 # Query for data IDs with no dimensions.
1345 dataIds = registry.queryDataIds([])
1346 self.assertEqual(
1347 dataIds.toSequence(),
1348 DataCoordinateSequence([dataId], registry.dimensions.empty)
1349 )
1350 # Use queried data IDs to find the datasets.
1351 self.assertCountEqual(
1352 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1353 [dataset1, dataset2],
1354 )
1355 self.assertEqual(
1356 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1357 [dataset1],
1358 )
1359 self.assertEqual(
1360 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1361 [dataset2],
1362 )
1363 # Now materialize the data ID query results and repeat those tests.
1364 with dataIds.materialize() as dataIds:
1365 self.assertEqual(
1366 dataIds.toSequence(),
1367 DataCoordinateSequence([dataId], registry.dimensions.empty)
1368 )
1369 self.assertCountEqual(
1370 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1371 [dataset1, dataset2],
1372 )
1373 self.assertEqual(
1374 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1375 [dataset1],
1376 )
1377 self.assertEqual(
1378 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1379 [dataset2],
1380 )
1381 # Query for non-empty data IDs, then subset that to get the empty one.
1382 # Repeat the above tests starting from that.
1383 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1384 self.assertEqual(
1385 dataIds.toSequence(),
1386 DataCoordinateSequence([dataId], registry.dimensions.empty)
1387 )
1388 self.assertCountEqual(
1389 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1390 [dataset1, dataset2],
1391 )
1392 self.assertEqual(
1393 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1394 [dataset1],
1395 )
1396 self.assertEqual(
1397 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1398 [dataset2],
1399 )
1400 with dataIds.materialize() as dataIds:
1401 self.assertEqual(
1402 dataIds.toSequence(),
1403 DataCoordinateSequence([dataId], registry.dimensions.empty)
1404 )
1405 self.assertCountEqual(
1406 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1407 [dataset1, dataset2],
1408 )
1409 self.assertEqual(
1410 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1411 [dataset1],
1412 )
1413 self.assertEqual(
1414 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1415 [dataset2],
1416 )
1417 # Query for non-empty data IDs, then materialize, then subset to get
1418 # the empty one. Repeat again.
1419 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1420 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1421 self.assertEqual(
1422 dataIds.toSequence(),
1423 DataCoordinateSequence([dataId], registry.dimensions.empty)
1424 )
1425 self.assertCountEqual(
1426 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1427 [dataset1, dataset2],
1428 )
1429 self.assertEqual(
1430 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1431 [dataset1],
1432 )
1433 self.assertEqual(
1434 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1435 [dataset2],
1436 )
1437 with dataIds.materialize() as dataIds:
1438 self.assertEqual(
1439 dataIds.toSequence(),
1440 DataCoordinateSequence([dataId], registry.dimensions.empty)
1441 )
1442 self.assertCountEqual(
1443 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1444 [dataset1, dataset2],
1445 )
1446 self.assertEqual(
1447 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1448 [dataset1],
1449 )
1450 self.assertEqual(
1451 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1452 [dataset2],
1453 )
1455 def testCalibrationCollections(self):
1456 """Test operations on `~CollectionType.CALIBRATION` collections,
1457 including `Registry.certify`, `Registry.decertify`, and
1458 `Registry.findDataset`.
1459 """
1460 # Setup - make a Registry, fill it with some datasets in
1461 # non-calibration collections.
1462 registry = self.makeRegistry()
1463 self.loadData(registry, "base.yaml")
1464 self.loadData(registry, "datasets.yaml")
1465 # Set up some timestamps.
1466 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1467 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1468 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1469 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1470 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1471 allTimespans = [
1472 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1473 ]
1474 # Get references to some datasets.
1475 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1476 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1477 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1478 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1479 # Register the main calibration collection we'll be working with.
1480 collection = "Cam1/calibs/default"
1481 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1482 # Cannot associate into a calibration collection (no timespan).
1483 with self.assertRaises(TypeError):
1484 registry.associate(collection, [bias2a])
1485 # Certify 2a dataset with [t2, t4) validity.
1486 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1487 # We should not be able to certify 2b with anything overlapping that
1488 # window.
1489 with self.assertRaises(ConflictingDefinitionError):
1490 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1491 with self.assertRaises(ConflictingDefinitionError):
1492 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1493 with self.assertRaises(ConflictingDefinitionError):
1494 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1495 with self.assertRaises(ConflictingDefinitionError):
1496 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1497 with self.assertRaises(ConflictingDefinitionError):
1498 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1499 with self.assertRaises(ConflictingDefinitionError):
1500 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1501 with self.assertRaises(ConflictingDefinitionError):
1502 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1503 with self.assertRaises(ConflictingDefinitionError):
1504 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1505 # We should be able to certify 3a with a range overlapping that window,
1506 # because it's for a different detector.
1507 # We'll certify 3a over [t1, t3).
1508 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1509 # Now we'll certify 2b and 3b together over [t4, ∞).
1510 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1512 # Fetch all associations and check that they are what we expect.
1513 self.assertCountEqual(
1514 list(
1515 registry.queryDatasetAssociations(
1516 "bias",
1517 collections=[collection, "imported_g", "imported_r"],
1518 )
1519 ),
1520 [
1521 DatasetAssociation(
1522 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1523 collection="imported_g",
1524 timespan=None,
1525 ),
1526 DatasetAssociation(
1527 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1528 collection="imported_r",
1529 timespan=None,
1530 ),
1531 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1532 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1533 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1534 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1535 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1536 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1537 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1538 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1539 ]
1540 )
1542 class Ambiguous:
1543 """Tag class to denote lookups that are expected to be ambiguous.
1544 """
1545 pass
1547 def assertLookup(detector: int, timespan: Timespan,
1548 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1549 """Local function that asserts that a bias lookup returns the given
1550 expected result.
1551 """
1552 if expected is Ambiguous:
1553 with self.assertRaises(RuntimeError):
1554 registry.findDataset("bias", collections=collection, instrument="Cam1",
1555 detector=detector, timespan=timespan)
1556 else:
1557 self.assertEqual(
1558 expected,
1559 registry.findDataset("bias", collections=collection, instrument="Cam1",
1560 detector=detector, timespan=timespan)
1561 )
1563 # Systematically test lookups against expected results.
1564 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1565 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1566 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1567 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1568 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1569 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1570 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1571 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1572 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1573 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1574 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1575 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1576 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1577 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1578 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1579 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1580 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1581 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1582 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1583 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1584 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1585 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1586 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1587 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1588 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1589 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1590 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1591 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1592 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1593 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1594 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1595 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1596 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1597 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1598 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1599 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1600 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1601 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1602 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1603 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1604 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1605 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1607 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1608 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1609 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1610 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1611 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1612 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1613 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1614 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1615 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1616 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1617 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1618 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1619 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1620 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1621 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1622 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1623 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1624 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1625 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1626 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1627 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1628 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1629 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1630 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1631 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1632 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1633 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1634 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1635 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1636 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1637 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1638 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1639 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1640 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1641 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1642 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1643 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1644 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1645 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1646 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1647 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1648 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1649 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1650 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1651 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1652 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1654 # Decertify everything, this time with explicit data IDs, then check
1655 # that no lookups succeed.
1656 registry.decertify(
1657 collection, "bias", Timespan(None, None),
1658 dataIds=[
1659 dict(instrument="Cam1", detector=2),
1660 dict(instrument="Cam1", detector=3),
1661 ]
1662 )
1663 for detector in (2, 3):
1664 for timespan in allTimespans:
1665 assertLookup(detector=detector, timespan=timespan, expected=None)
1666 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1667 # those.
1668 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1669 for timespan in allTimespans:
1670 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1671 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1672 # Decertify just bias2 over [t2, t4).
1673 # This should split a single certification row into two (and leave the
1674 # other existing row, for bias3a, alone).
1675 registry.decertify(collection, "bias", Timespan(t2, t4),
1676 dataIds=[dict(instrument="Cam1", detector=2)])
1677 for timespan in allTimespans:
1678 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1679 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1680 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1681 if overlapsBefore and overlapsAfter:
1682 expected = Ambiguous
1683 elif overlapsBefore or overlapsAfter:
1684 expected = bias2a
1685 else:
1686 expected = None
1687 assertLookup(detector=2, timespan=timespan, expected=expected)
1689 def testIngestTimeQuery(self):
1691 registry = self.makeRegistry()
1692 self.loadData(registry, "base.yaml")
1693 self.loadData(registry, "datasets.yaml")
1695 datasets = list(registry.queryDatasets(..., collections=...))
1696 len0 = len(datasets)
1697 self.assertGreater(len0, 0)
1699 where = "ingest_date > T'2000-01-01'"
1700 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1701 len1 = len(datasets)
1702 self.assertEqual(len0, len1)
1704 # no one will ever use this piece of software in 30 years
1705 where = "ingest_date > T'2050-01-01'"
1706 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1707 len2 = len(datasets)
1708 self.assertEqual(len2, 0)
1710 def testTimespanQueries(self):
1711 """Test query expressions involving timespans.
1712 """
1713 registry = self.makeRegistry()
1714 self.loadData(registry, "hsc-rc2-subset.yaml")
1715 # All exposures in the database; mapping from ID to timespan.
1716 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1717 # Just those IDs, sorted (which is also temporal sorting, because HSC
1718 # exposure IDs are monotonically increasing).
1719 ids = sorted(visits.keys())
1720 self.assertGreater(len(ids), 20)
1721 # Pick some quasi-random indexes into `ids` to play with.
1722 i1 = int(len(ids)*0.1)
1723 i2 = int(len(ids)*0.3)
1724 i3 = int(len(ids)*0.6)
1725 i4 = int(len(ids)*0.8)
1726 # Extract some times from those: just before the beginning of i1 (which
1727 # should be after the end of the exposure before), exactly the
1728 # beginning of i2, just after the beginning of i3 (and before its end),
1729 # and the exact end of i4.
1730 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1731 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1732 t2 = visits[ids[i2]].begin
1733 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1734 self.assertLess(t3, visits[ids[i3]].end)
1735 t4 = visits[ids[i4]].end
1736 # Make sure those are actually in order.
1737 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1739 bind = {
1740 "t1": t1,
1741 "t2": t2,
1742 "t3": t3,
1743 "t4": t4,
1744 "ts23": Timespan(t2, t3),
1745 }
1747 def query(where):
1748 """Helper function that queries for visit data IDs and returns
1749 results as a sorted, deduplicated list of visit IDs.
1750 """
1751 return sorted(
1752 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1753 instrument="HSC",
1754 bind=bind,
1755 where=where)}
1756 )
1758 # Try a bunch of timespan queries, mixing up the bounds themselves,
1759 # where they appear in the expression, and how we get the timespan into
1760 # the expression.
1762 # t1 is before the start of i1, so this should not include i1.
1763 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1764 # t2 is exactly at the start of i2, but ends are exclusive, so these
1765 # should not include i2.
1766 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1767 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1768 # t3 is in the middle of i3, so this should include i3.
1769 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1770 # This one should not include t3 by the same reasoning.
1771 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1772 # t4 is exactly at the end of i4, so this should include i4.
1773 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1774 # i4's upper bound of t4 is exclusive so this should not include t4.
1775 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1777 # Now some timespan vs. time scalar queries.
1778 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1779 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1780 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1781 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1782 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1783 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1785 # Empty timespans should not overlap anything.
1786 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))
1788 def testCollectionSummaries(self):
1789 """Test recording and retrieval of collection summaries.
1790 """
1791 self.maxDiff = None
1792 registry = self.makeRegistry()
1793 # Importing datasets from yaml should go through the code path where
1794 # we update collection summaries as we insert datasets.
1795 self.loadData(registry, "base.yaml")
1796 self.loadData(registry, "datasets.yaml")
1797 flat = registry.getDatasetType("flat")
1798 expected1 = CollectionSummary.makeEmpty(registry.dimensions)
1799 expected1.datasetTypes.add(registry.getDatasetType("bias"))
1800 expected1.datasetTypes.add(flat)
1801 expected1.dimensions.update_extract(
1802 DataCoordinate.standardize(instrument="Cam1", universe=registry.dimensions)
1803 )
1804 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1805 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1806 # Create a chained collection with both of the imported runs; the
1807 # summary should be the same, because it's a union with itself.
1808 chain = "chain"
1809 registry.registerCollection(chain, CollectionType.CHAINED)
1810 registry.setCollectionChain(chain, ["imported_r", "imported_g"])
1811 self.assertEqual(registry.getCollectionSummary(chain), expected1)
1812 # Associate flats only into a tagged collection and a calibration
1813 # collection to check summaries of those.
1814 tag = "tag"
1815 registry.registerCollection(tag, CollectionType.TAGGED)
1816 registry.associate(tag, registry.queryDatasets(flat, collections="imported_g"))
1817 calibs = "calibs"
1818 registry.registerCollection(calibs, CollectionType.CALIBRATION)
1819 registry.certify(calibs, registry.queryDatasets(flat, collections="imported_g"),
1820 timespan=Timespan(None, None))
1821 expected2 = expected1.copy()
1822 expected2.datasetTypes.discard("bias")
1823 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1824 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1825 # Explicitly calling Registry.refresh() should load those same
1826 # summaries, via a totally different code path.
1827 registry.refresh()
1828 self.assertEqual(registry.getCollectionSummary("imported_g"), expected1)
1829 self.assertEqual(registry.getCollectionSummary("imported_r"), expected1)
1830 self.assertEqual(registry.getCollectionSummary(tag), expected2)
1831 self.assertEqual(registry.getCollectionSummary(calibs), expected2)
1833 def testUnrelatedDimensionQueries(self):
1834 """Test that WHERE expressions in queries can reference dimensions that
1835 are not in the result set.
1836 """
1837 registry = self.makeRegistry()
1838 # There is no data to back this query, but it should still return
1839 # zero records instead of raising.
1840 self.assertFalse(
1841 set(registry.queryDataIds(["visit", "detector"],
1842 where="instrument='Cam1' AND skymap='not_here' AND tract=0")),
1843 )