Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import logging
29import os
30import re
31import unittest
33import astropy.time
34import sqlalchemy
35from typing import Optional, Type, Union
37try:
38 import numpy as np
39except ImportError:
40 np = None
42from ...core import (
43 DataCoordinate,
44 DataCoordinateSequence,
45 DataCoordinateSet,
46 DatasetAssociation,
47 DatasetRef,
48 DatasetType,
49 DimensionGraph,
50 NamedValueSet,
51 StorageClass,
52 ddl,
53 Timespan,
54)
55from .._registry import (
56 CollectionType,
57 ConflictingDefinitionError,
58 InconsistentDataIdError,
59 Registry,
60 RegistryConfig,
61)
62from ..interfaces import MissingCollectionError, ButlerAttributeExistsError
65class RegistryTests(ABC):
66 """Generic tests for the `Registry` class that can be subclassed to
67 generate tests for different configurations.
68 """
70 collectionsManager: Optional[str] = None
71 """Name of the collections manager class, if subclass provides value for
72 this member then it overrides name specified in default configuration
73 (`str`).
74 """
76 @classmethod
77 @abstractmethod
78 def getDataDir(cls) -> str:
79 """Return the root directory containing test data YAML files.
80 """
81 raise NotImplementedError()
83 def makeRegistryConfig(self) -> RegistryConfig:
84 """Create RegistryConfig used to create a registry.
86 This method should be called by a subclass from `makeRegistry`.
87 Returned instance will be pre-configured based on the values of class
88 members, and default-configured for all other parametrs. Subclasses
89 that need default configuration should just instantiate
90 `RegistryConfig` directly.
91 """
92 config = RegistryConfig()
93 if self.collectionsManager:
94 config["managers"]["collections"] = self.collectionsManager
95 return config
97 @abstractmethod
98 def makeRegistry(self) -> Registry:
99 """Return the Registry instance to be tested.
100 """
101 raise NotImplementedError()
103 def loadData(self, registry: Registry, filename: str):
104 """Load registry test data from ``getDataDir/<filename>``,
105 which should be a YAML import/export file.
106 """
107 from ...transfers import YamlRepoImportBackend
108 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
109 backend = YamlRepoImportBackend(stream, registry)
110 backend.register()
111 backend.load(datastore=None)
113 def testOpaque(self):
114 """Tests for `Registry.registerOpaqueTable`,
115 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
116 `Registry.deleteOpaqueData`.
117 """
118 registry = self.makeRegistry()
119 table = "opaque_table_for_testing"
120 registry.registerOpaqueTable(
121 table,
122 spec=ddl.TableSpec(
123 fields=[
124 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
125 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
126 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
127 ],
128 )
129 )
130 rows = [
131 {"id": 1, "name": "one", "count": None},
132 {"id": 2, "name": "two", "count": 5},
133 {"id": 3, "name": "three", "count": 6},
134 ]
135 registry.insertOpaqueData(table, *rows)
136 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
137 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
138 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
139 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
140 registry.deleteOpaqueData(table, id=3)
141 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
142 registry.deleteOpaqueData(table)
143 self.assertEqual([], list(registry.fetchOpaqueData(table)))
145 def testDatasetType(self):
146 """Tests for `Registry.registerDatasetType` and
147 `Registry.getDatasetType`.
148 """
149 registry = self.makeRegistry()
150 # Check valid insert
151 datasetTypeName = "test"
152 storageClass = StorageClass("testDatasetType")
153 registry.storageClasses.registerStorageClass(storageClass)
154 dimensions = registry.dimensions.extract(("instrument", "visit"))
155 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
156 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
157 # Inserting for the first time should return True
158 self.assertTrue(registry.registerDatasetType(inDatasetType))
159 outDatasetType1 = registry.getDatasetType(datasetTypeName)
160 self.assertEqual(outDatasetType1, inDatasetType)
162 # Re-inserting should work
163 self.assertFalse(registry.registerDatasetType(inDatasetType))
164 # Except when they are not identical
165 with self.assertRaises(ConflictingDefinitionError):
166 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
167 registry.registerDatasetType(nonIdenticalDatasetType)
169 # Template can be None
170 datasetTypeName = "testNoneTemplate"
171 storageClass = StorageClass("testDatasetType2")
172 registry.storageClasses.registerStorageClass(storageClass)
173 dimensions = registry.dimensions.extract(("instrument", "visit"))
174 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
175 registry.registerDatasetType(inDatasetType)
176 outDatasetType2 = registry.getDatasetType(datasetTypeName)
177 self.assertEqual(outDatasetType2, inDatasetType)
179 allTypes = set(registry.queryDatasetTypes())
180 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
182 def testDimensions(self):
183 """Tests for `Registry.insertDimensionData`,
184 `Registry.syncDimensionData`, and `Registry.expandDataId`.
185 """
186 registry = self.makeRegistry()
187 dimensionName = "instrument"
188 dimension = registry.dimensions[dimensionName]
189 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
190 "class_name": "lsst.obs.base.Instrument"}
191 registry.insertDimensionData(dimensionName, dimensionValue)
192 # Inserting the same value twice should fail
193 with self.assertRaises(sqlalchemy.exc.IntegrityError):
194 registry.insertDimensionData(dimensionName, dimensionValue)
195 # expandDataId should retrieve the record we just inserted
196 self.assertEqual(
197 registry.expandDataId(
198 instrument="DummyCam",
199 graph=dimension.graph
200 ).records[dimensionName].toDict(),
201 dimensionValue
202 )
203 # expandDataId should raise if there is no record with the given ID.
204 with self.assertRaises(LookupError):
205 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
206 # band doesn't have a table; insert should fail.
207 with self.assertRaises(TypeError):
208 registry.insertDimensionData("band", {"band": "i"})
209 dimensionName2 = "physical_filter"
210 dimension2 = registry.dimensions[dimensionName2]
211 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
212 # Missing required dependency ("instrument") should fail
213 with self.assertRaises(KeyError):
214 registry.insertDimensionData(dimensionName2, dimensionValue2)
215 # Adding required dependency should fix the failure
216 dimensionValue2["instrument"] = "DummyCam"
217 registry.insertDimensionData(dimensionName2, dimensionValue2)
218 # expandDataId should retrieve the record we just inserted.
219 self.assertEqual(
220 registry.expandDataId(
221 instrument="DummyCam", physical_filter="DummyCam_i",
222 graph=dimension2.graph
223 ).records[dimensionName2].toDict(),
224 dimensionValue2
225 )
226 # Use syncDimensionData to insert a new record successfully.
227 dimensionName3 = "detector"
228 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
229 "name_in_raft": "zero", "purpose": "SCIENCE"}
230 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
231 # Sync that again. Note that one field ("raft") is NULL, and that
232 # should be okay.
233 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
234 # Now try that sync with the same primary key but a different value.
235 # This should fail.
236 with self.assertRaises(ConflictingDefinitionError):
237 registry.syncDimensionData(
238 dimensionName3,
239 {"instrument": "DummyCam", "id": 1, "full_name": "one",
240 "name_in_raft": "four", "purpose": "SCIENCE"}
241 )
243 @unittest.skipIf(np is None, "numpy not available.")
244 def testNumpyDataId(self):
245 """Test that we can use a numpy int in a dataId."""
246 registry = self.makeRegistry()
247 dimensionEntries = [
248 ("instrument", {"instrument": "DummyCam"}),
249 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
250 # Using an np.int64 here fails unless Records.fromDict is also
251 # patched to look for numbers.Integral
252 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
253 ]
254 for args in dimensionEntries:
255 registry.insertDimensionData(*args)
257 # Try a normal integer and something that looks like an int but
258 # is not.
259 for visit_id in (42, np.int64(42)):
260 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
261 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
262 self.assertEqual(expanded["visit"], int(visit_id))
263 self.assertIsInstance(expanded["visit"], int)
265 def testDataIdRelationships(self):
266 """Test that `Registry.expandDataId` raises an exception when the given
267 keys are inconsistent.
268 """
269 registry = self.makeRegistry()
270 self.loadData(registry, "base.yaml")
271 # Insert a few more dimension records for the next test.
272 registry.insertDimensionData(
273 "exposure",
274 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
275 )
276 registry.insertDimensionData(
277 "exposure",
278 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
279 )
280 registry.insertDimensionData(
281 "visit_system",
282 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
283 )
284 registry.insertDimensionData(
285 "visit",
286 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
287 )
288 registry.insertDimensionData(
289 "visit_definition",
290 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
291 )
292 with self.assertRaises(InconsistentDataIdError):
293 registry.expandDataId(
294 {"instrument": "Cam1", "visit": 1, "exposure": 2},
295 )
297 def testDataset(self):
298 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
299 and `Registry.removeDatasets`.
300 """
301 registry = self.makeRegistry()
302 self.loadData(registry, "base.yaml")
303 run = "test"
304 registry.registerRun(run)
305 datasetType = registry.getDatasetType("bias")
306 dataId = {"instrument": "Cam1", "detector": 2}
307 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
308 outRef = registry.getDataset(ref.id)
309 self.assertIsNotNone(ref.id)
310 self.assertEqual(ref, outRef)
311 with self.assertRaises(ConflictingDefinitionError):
312 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
313 registry.removeDatasets([ref])
314 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
316 def testFindDataset(self):
317 """Tests for `Registry.findDataset`.
318 """
319 registry = self.makeRegistry()
320 self.loadData(registry, "base.yaml")
321 run = "test"
322 datasetType = registry.getDatasetType("bias")
323 dataId = {"instrument": "Cam1", "detector": 4}
324 registry.registerRun(run)
325 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
326 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
327 self.assertEqual(outputRef, inputRef)
328 # Check that retrieval with invalid dataId raises
329 with self.assertRaises(LookupError):
330 dataId = {"instrument": "Cam1"} # no detector
331 registry.findDataset(datasetType, dataId, collections=run)
332 # Check that different dataIds match to different datasets
333 dataId1 = {"instrument": "Cam1", "detector": 1}
334 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
335 dataId2 = {"instrument": "Cam1", "detector": 2}
336 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
337 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
338 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
339 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
340 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
341 # Check that requesting a non-existing dataId returns None
342 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
343 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
345 def testDatasetTypeComponentQueries(self):
346 """Test component options when querying for dataset types.
347 """
348 registry = self.makeRegistry()
349 self.loadData(registry, "base.yaml")
350 self.loadData(registry, "datasets.yaml")
351 # Test querying for dataset types with different inputs.
352 # First query for all dataset types; components should only be included
353 # when components=True.
354 self.assertEqual(
355 {"bias", "flat"},
356 NamedValueSet(registry.queryDatasetTypes()).names
357 )
358 self.assertEqual(
359 {"bias", "flat"},
360 NamedValueSet(registry.queryDatasetTypes(components=False)).names
361 )
362 self.assertLess(
363 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
364 NamedValueSet(registry.queryDatasetTypes(components=True)).names
365 )
366 # Use a pattern that can match either parent or components. Again,
367 # components are only returned if components=True.
368 self.assertEqual(
369 {"bias"},
370 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
371 )
372 self.assertEqual(
373 {"bias"},
374 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
375 )
376 self.assertLess(
377 {"bias", "bias.wcs"},
378 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
379 )
380 # This pattern matches only a component. In this case we also return
381 # that component dataset type if components=None.
382 self.assertEqual(
383 {"bias.wcs"},
384 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
385 )
386 self.assertEqual(
387 set(),
388 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
389 )
390 self.assertEqual(
391 {"bias.wcs"},
392 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
393 )
394 # Add a dataset type using a StorageClass that we'll then remove; check
395 # that this does not affect our ability to query for dataset types
396 # (though it will warn).
397 tempStorageClass = StorageClass(
398 name="TempStorageClass",
399 components={"data", registry.storageClasses.getStorageClass("StructuredDataDict")}
400 )
401 registry.storageClasses.registerStorageClass(tempStorageClass)
402 datasetType = DatasetType("temporary", dimensions=["instrument"], storageClass=tempStorageClass,
403 universe=registry.dimensions)
404 registry.registerDatasetType(datasetType)
405 registry.storageClasses._unregisterStorageClass(tempStorageClass.name)
406 datasetType._storageClass = None
407 del tempStorageClass
408 # Querying for all dataset types, including components, should include
409 # at least all non-component dataset types (and I don't want to
410 # enumerate all of the Exposure components for bias and flat here).
411 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
412 everything = NamedValueSet(registry.queryDatasetTypes(components=True))
413 self.assertIn("TempStorageClass", cm.output[0])
414 self.assertLess({"bias", "flat", "temporary"}, everything.names)
415 # It should not include "temporary.columns", because we tried to remove
416 # the storage class that would tell it about that. So if the next line
417 # fails (i.e. "temporary.columns" _is_ in everything.names), it means
418 # this part of the test isn't doing anything, because the _unregister
419 # call about isn't simulating the real-life case we want it to
420 # simulate, in which different versions of daf_butler in entirely
421 # different Python processes interact with the same repo.
422 self.assertNotIn("temporary.data", everything.names)
423 # Query for dataset types that start with "temp". This should again
424 # not include the component, and also not fail.
425 with self.assertLogs("lsst.daf.butler.registry._registry", logging.WARN) as cm:
426 startsWithTemp = NamedValueSet(registry.queryDatasetTypes(re.compile("temp.*")))
427 self.assertIn("TempStorageClass", cm.output[0])
428 self.assertEqual({"temporary"}, startsWithTemp.names)
430 def testComponentLookups(self):
431 """Test searching for component datasets via their parents.
432 """
433 registry = self.makeRegistry()
434 self.loadData(registry, "base.yaml")
435 self.loadData(registry, "datasets.yaml")
436 # Test getting the child dataset type (which does still exist in the
437 # Registry), and check for consistency with
438 # DatasetRef.makeComponentRef.
439 collection = "imported_g"
440 parentType = registry.getDatasetType("bias")
441 childType = registry.getDatasetType("bias.wcs")
442 parentRefResolved = registry.findDataset(parentType, collections=collection,
443 instrument="Cam1", detector=1)
444 self.assertIsInstance(parentRefResolved, DatasetRef)
445 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
446 # Search for a single dataset with findDataset.
447 childRef1 = registry.findDataset("bias.wcs", collections=collection,
448 dataId=parentRefResolved.dataId)
449 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
450 # Search for detector data IDs constrained by component dataset
451 # existence with queryDataIds.
452 dataIds = registry.queryDataIds(
453 ["detector"],
454 datasets=["bias.wcs"],
455 collections=collection,
456 ).toSet()
457 self.assertEqual(
458 dataIds,
459 DataCoordinateSet(
460 {
461 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
462 for d in (1, 2, 3)
463 },
464 parentType.dimensions,
465 )
466 )
467 # Search for multiple datasets of a single type with queryDatasets.
468 childRefs2 = set(registry.queryDatasets(
469 "bias.wcs",
470 collections=collection,
471 ))
472 self.assertEqual(
473 {ref.unresolved() for ref in childRefs2},
474 {DatasetRef(childType, dataId) for dataId in dataIds}
475 )
477 def testCollections(self):
478 """Tests for registry methods that manage collections.
479 """
480 registry = self.makeRegistry()
481 self.loadData(registry, "base.yaml")
482 self.loadData(registry, "datasets.yaml")
483 run1 = "imported_g"
484 run2 = "imported_r"
485 # Test setting a collection docstring after it has been created.
486 registry.setCollectionDocumentation(run1, "doc for run1")
487 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
488 registry.setCollectionDocumentation(run1, None)
489 self.assertIsNone(registry.getCollectionDocumentation(run1))
490 datasetType = "bias"
491 # Find some datasets via their run's collection.
492 dataId1 = {"instrument": "Cam1", "detector": 1}
493 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
494 self.assertIsNotNone(ref1)
495 dataId2 = {"instrument": "Cam1", "detector": 2}
496 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
497 self.assertIsNotNone(ref2)
498 # Associate those into a new collection,then look for them there.
499 tag1 = "tag1"
500 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
501 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
502 registry.associate(tag1, [ref1, ref2])
503 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
504 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
505 # Disassociate one and verify that we can't it there anymore...
506 registry.disassociate(tag1, [ref1])
507 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
508 # ...but we can still find ref2 in tag1, and ref1 in the run.
509 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
510 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
511 collections = set(registry.queryCollections())
512 self.assertEqual(collections, {run1, run2, tag1})
513 # Associate both refs into tag1 again; ref2 is already there, but that
514 # should be a harmless no-op.
515 registry.associate(tag1, [ref1, ref2])
516 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
517 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
518 # Get a different dataset (from a different run) that has the same
519 # dataset type and data ID as ref2.
520 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
521 self.assertNotEqual(ref2, ref2b)
522 # Attempting to associate that into tag1 should be an error.
523 with self.assertRaises(ConflictingDefinitionError):
524 registry.associate(tag1, [ref2b])
525 # That error shouldn't have messed up what we had before.
526 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
527 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
528 # Attempt to associate the conflicting dataset again, this time with
529 # a dataset that isn't in the collection and won't cause a conflict.
530 # Should also fail without modifying anything.
531 dataId3 = {"instrument": "Cam1", "detector": 3}
532 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
533 with self.assertRaises(ConflictingDefinitionError):
534 registry.associate(tag1, [ref3, ref2b])
535 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
536 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
537 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
538 # Register a chained collection that searches [tag1, run2]
539 chain1 = "chain1"
540 registry.registerCollection(chain1, type=CollectionType.CHAINED)
541 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
542 # Chained collection exists, but has no collections in it.
543 self.assertFalse(registry.getCollectionChain(chain1))
544 # If we query for all collections, we should get the chained collection
545 # only if we don't ask to flatten it (i.e. yield only its children).
546 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
547 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
548 # Attempt to set its child collections to something circular; that
549 # should fail.
550 with self.assertRaises(ValueError):
551 registry.setCollectionChain(chain1, [tag1, chain1])
552 # Add the child collections.
553 registry.setCollectionChain(chain1, [tag1, run2])
554 self.assertEqual(
555 list(registry.getCollectionChain(chain1)),
556 [tag1, run2]
557 )
558 # Searching for dataId1 or dataId2 in the chain should return ref1 and
559 # ref2, because both are in tag1.
560 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
561 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
562 # Now disassociate ref2 from tag1. The search (for bias) with
563 # dataId2 in chain1 should then:
564 # 1. not find it in tag1
565 # 2. find a different dataset in run2
566 registry.disassociate(tag1, [ref2])
567 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
568 self.assertNotEqual(ref2b, ref2)
569 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
570 # Define a new chain so we can test recursive chains.
571 chain2 = "chain2"
572 registry.registerCollection(chain2, type=CollectionType.CHAINED)
573 registry.setCollectionChain(chain2, [run2, chain1])
574 # Query for collections matching a regex.
575 self.assertCountEqual(
576 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
577 ["imported_r", "imported_g"]
578 )
579 # Query for collections matching a regex or an explicit str.
580 self.assertCountEqual(
581 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
582 ["imported_r", "imported_g", "chain1"]
583 )
584 # Search for bias with dataId1 should find it via tag1 in chain2,
585 # recursing, because is not in run1.
586 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
587 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
588 # Search for bias with dataId2 should find it in run2 (ref2b).
589 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
590 # Search for a flat that is in run2. That should not be found
591 # at the front of chain2, because of the restriction to bias
592 # on run2 there, but it should be found in at the end of chain1.
593 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
594 ref4 = registry.findDataset("flat", dataId4, collections=run2)
595 self.assertIsNotNone(ref4)
596 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
597 # Deleting a collection that's part of a CHAINED collection is not
598 # allowed, and is exception-safe.
599 with self.assertRaises(Exception):
600 registry.removeCollection(run2)
601 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
602 with self.assertRaises(Exception):
603 registry.removeCollection(chain1)
604 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
605 # Actually remove chain2, test that it's gone by asking for its type.
606 registry.removeCollection(chain2)
607 with self.assertRaises(MissingCollectionError):
608 registry.getCollectionType(chain2)
609 # Actually remove run2 and chain1, which should work now.
610 registry.removeCollection(chain1)
611 registry.removeCollection(run2)
612 with self.assertRaises(MissingCollectionError):
613 registry.getCollectionType(run2)
614 with self.assertRaises(MissingCollectionError):
615 registry.getCollectionType(chain1)
616 # Remove tag1 as well, just to test that we can remove TAGGED
617 # collections.
618 registry.removeCollection(tag1)
619 with self.assertRaises(MissingCollectionError):
620 registry.getCollectionType(tag1)
622 def testBasicTransaction(self):
623 """Test that all operations within a single transaction block are
624 rolled back if an exception propagates out of the block.
625 """
626 registry = self.makeRegistry()
627 storageClass = StorageClass("testDatasetType")
628 registry.storageClasses.registerStorageClass(storageClass)
629 with registry.transaction():
630 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
631 with self.assertRaises(ValueError):
632 with registry.transaction():
633 registry.insertDimensionData("instrument", {"name": "Cam2"})
634 raise ValueError("Oops, something went wrong")
635 # Cam1 should exist
636 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
637 # But Cam2 and Cam3 should both not exist
638 with self.assertRaises(LookupError):
639 registry.expandDataId(instrument="Cam2")
640 with self.assertRaises(LookupError):
641 registry.expandDataId(instrument="Cam3")
643 def testNestedTransaction(self):
644 """Test that operations within a transaction block are not rolled back
645 if an exception propagates out of an inner transaction block and is
646 then caught.
647 """
648 registry = self.makeRegistry()
649 dimension = registry.dimensions["instrument"]
650 dataId1 = {"instrument": "DummyCam"}
651 dataId2 = {"instrument": "DummyCam2"}
652 checkpointReached = False
653 with registry.transaction():
654 # This should be added and (ultimately) committed.
655 registry.insertDimensionData(dimension, dataId1)
656 with self.assertRaises(sqlalchemy.exc.IntegrityError):
657 with registry.transaction(savepoint=True):
658 # This does not conflict, and should succeed (but not
659 # be committed).
660 registry.insertDimensionData(dimension, dataId2)
661 checkpointReached = True
662 # This should conflict and raise, triggerring a rollback
663 # of the previous insertion within the same transaction
664 # context, but not the original insertion in the outer
665 # block.
666 registry.insertDimensionData(dimension, dataId1)
667 self.assertTrue(checkpointReached)
668 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
669 with self.assertRaises(LookupError):
670 registry.expandDataId(dataId2, graph=dimension.graph)
672 def testInstrumentDimensions(self):
673 """Test queries involving only instrument dimensions, with no joins to
674 skymap."""
675 registry = self.makeRegistry()
677 # need a bunch of dimensions and datasets for test
678 registry.insertDimensionData(
679 "instrument",
680 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
681 )
682 registry.insertDimensionData(
683 "physical_filter",
684 dict(instrument="DummyCam", name="dummy_r", band="r"),
685 dict(instrument="DummyCam", name="dummy_i", band="i"),
686 )
687 registry.insertDimensionData(
688 "detector",
689 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
690 )
691 registry.insertDimensionData(
692 "visit_system",
693 dict(instrument="DummyCam", id=1, name="default"),
694 )
695 registry.insertDimensionData(
696 "visit",
697 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
698 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
699 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
700 )
701 registry.insertDimensionData(
702 "exposure",
703 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
704 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
705 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
706 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
707 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
708 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
709 )
710 registry.insertDimensionData(
711 "visit_definition",
712 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
713 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
714 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
715 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
716 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
717 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
718 )
719 # dataset types
720 run1 = "test1_r"
721 run2 = "test2_r"
722 tagged2 = "test2_t"
723 registry.registerRun(run1)
724 registry.registerRun(run2)
725 registry.registerCollection(tagged2)
726 storageClass = StorageClass("testDataset")
727 registry.storageClasses.registerStorageClass(storageClass)
728 rawType = DatasetType(name="RAW",
729 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
730 storageClass=storageClass)
731 registry.registerDatasetType(rawType)
732 calexpType = DatasetType(name="CALEXP",
733 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
734 storageClass=storageClass)
735 registry.registerDatasetType(calexpType)
737 # add pre-existing datasets
738 for exposure in (100, 101, 110, 111):
739 for detector in (1, 2, 3):
740 # note that only 3 of 5 detectors have datasets
741 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
742 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
743 # exposures 100 and 101 appear in both run1 and tagged2.
744 # 100 has different datasets in the different collections
745 # 101 has the same dataset in both collections.
746 if exposure == 100:
747 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
748 if exposure in (100, 101):
749 registry.associate(tagged2, [ref])
750 # Add pre-existing datasets to tagged2.
751 for exposure in (200, 201):
752 for detector in (3, 4, 5):
753 # note that only 3 of 5 detectors have datasets
754 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
755 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
756 registry.associate(tagged2, [ref])
758 dimensions = DimensionGraph(
759 registry.dimensions,
760 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
761 )
762 # Test that single dim string works as well as list of str
763 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
764 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
765 self.assertEqual(rows, rowsI)
766 # with empty expression
767 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
768 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
769 for dataId in rows:
770 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
771 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
772 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
773 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
774 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
775 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
776 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
777 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
778 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
779 (100, 101, 110, 111))
780 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
781 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
783 # second collection
784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
785 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
786 for dataId in rows:
787 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
788 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
789 (100, 101, 200, 201))
790 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
791 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
793 # with two input datasets
794 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
795 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
796 for dataId in rows:
797 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
798 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
799 (100, 101, 110, 111, 200, 201))
800 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
801 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
803 # limit to single visit
804 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
805 where="visit = 10", instrument="DummyCam").toSet()
806 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
807 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
808 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
809 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
811 # more limiting expression, using link names instead of Table.column
812 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
813 where="visit = 10 and detector > 1 and 'DummyCam'=instrument").toSet()
814 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
815 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
816 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
817 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
819 # expression excludes everything
820 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
821 where="visit > 1000", instrument="DummyCam").toSet()
822 self.assertEqual(len(rows), 0)
824 # Selecting by physical_filter, this is not in the dimensions, but it
825 # is a part of the full expression so it should work too.
826 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
827 where="physical_filter = 'dummy_r'", instrument="DummyCam").toSet()
828 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
829 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
830 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
831 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
833 def testSkyMapDimensions(self):
834 """Tests involving only skymap dimensions, no joins to instrument."""
835 registry = self.makeRegistry()
837 # need a bunch of dimensions and datasets for test, we want
838 # "band" in the test so also have to add physical_filter
839 # dimensions
840 registry.insertDimensionData(
841 "instrument",
842 dict(instrument="DummyCam")
843 )
844 registry.insertDimensionData(
845 "physical_filter",
846 dict(instrument="DummyCam", name="dummy_r", band="r"),
847 dict(instrument="DummyCam", name="dummy_i", band="i"),
848 )
849 registry.insertDimensionData(
850 "skymap",
851 dict(name="DummyMap", hash="sha!".encode("utf8"))
852 )
853 for tract in range(10):
854 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
855 registry.insertDimensionData(
856 "patch",
857 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
858 for patch in range(10)]
859 )
861 # dataset types
862 run = "test"
863 registry.registerRun(run)
864 storageClass = StorageClass("testDataset")
865 registry.storageClasses.registerStorageClass(storageClass)
866 calexpType = DatasetType(name="deepCoadd_calexp",
867 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
868 "band")),
869 storageClass=storageClass)
870 registry.registerDatasetType(calexpType)
871 mergeType = DatasetType(name="deepCoadd_mergeDet",
872 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
873 storageClass=storageClass)
874 registry.registerDatasetType(mergeType)
875 measType = DatasetType(name="deepCoadd_meas",
876 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
877 "band")),
878 storageClass=storageClass)
879 registry.registerDatasetType(measType)
881 dimensions = DimensionGraph(
882 registry.dimensions,
883 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
884 | measType.dimensions.required)
885 )
887 # add pre-existing datasets
888 for tract in (1, 3, 5):
889 for patch in (2, 4, 6, 7):
890 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
891 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
892 for aFilter in ("i", "r"):
893 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
894 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
896 # with empty expression
897 rows = registry.queryDataIds(dimensions,
898 datasets=[calexpType, mergeType], collections=run).toSet()
899 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
900 for dataId in rows:
901 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
902 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
903 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
904 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
906 # limit to 2 tracts and 2 patches
907 rows = registry.queryDataIds(dimensions,
908 datasets=[calexpType, mergeType], collections=run,
909 where="tract IN (1, 5) AND patch IN (2, 7)", skymap="DummyMap").toSet()
910 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
911 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
912 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
913 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
915 # limit to single filter
916 rows = registry.queryDataIds(dimensions,
917 datasets=[calexpType, mergeType], collections=run,
918 where="band = 'i'").toSet()
919 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
920 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
921 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
922 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
924 # expression excludes everything, specifying non-existing skymap is
925 # not a fatal error, it's operator error
926 rows = registry.queryDataIds(dimensions,
927 datasets=[calexpType, mergeType], collections=run,
928 where="skymap = 'Mars'").toSet()
929 self.assertEqual(len(rows), 0)
931 def testSpatialJoin(self):
932 """Test queries that involve spatial overlap joins.
933 """
934 registry = self.makeRegistry()
935 self.loadData(registry, "hsc-rc2-subset.yaml")
937 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
938 # the TopologicalFamily they belong to. We'll relate all elements in
939 # each family to all of the elements in each other family.
940 families = defaultdict(set)
941 # Dictionary of {element.name: {dataId: region}}.
942 regions = {}
943 for element in registry.dimensions.getDatabaseElements():
944 if element.spatial is not None:
945 families[element.spatial.name].add(element)
946 regions[element.name] = {
947 record.dataId: record.region for record in registry.queryDimensionRecords(element)
948 }
950 # If this check fails, it's not necessarily a problem - it may just be
951 # a reasonable change to the default dimension definitions - but the
952 # test below depends on there being more than one family to do anything
953 # useful.
954 self.assertEqual(len(families), 2)
956 # Overlap DatabaseDimensionElements with each other.
957 for family1, family2 in itertools.combinations(families, 2):
958 for element1, element2 in itertools.product(families[family1], families[family2]):
959 graph = DimensionGraph.union(element1.graph, element2.graph)
960 # Construct expected set of overlapping data IDs via a
961 # brute-force comparison of the regions we've already fetched.
962 expected = {
963 DataCoordinate.standardize(
964 {**dataId1.byName(), **dataId2.byName()},
965 graph=graph
966 )
967 for (dataId1, region1), (dataId2, region2)
968 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
969 if not region1.isDisjointFrom(region2)
970 }
971 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
972 queried = set(registry.queryDataIds(graph))
973 self.assertEqual(expected, queried)
975 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
976 commonSkyPix = registry.dimensions.commonSkyPix
977 for elementName, regions in regions.items():
978 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
979 expected = set()
980 for dataId, region in regions.items():
981 for begin, end in commonSkyPix.pixelization.envelope(region):
982 expected.update(
983 DataCoordinate.standardize(
984 {commonSkyPix.name: index, **dataId.byName()},
985 graph=graph
986 )
987 for index in range(begin, end)
988 )
989 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
990 queried = set(registry.queryDataIds(graph))
991 self.assertEqual(expected, queried)
993 def testAbstractQuery(self):
994 """Test that we can run a query that just lists the known
995 bands. This is tricky because band is
996 backed by a query against physical_filter.
997 """
998 registry = self.makeRegistry()
999 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1000 registry.insertDimensionData(
1001 "physical_filter",
1002 dict(instrument="DummyCam", name="dummy_i", band="i"),
1003 dict(instrument="DummyCam", name="dummy_i2", band="i"),
1004 dict(instrument="DummyCam", name="dummy_r", band="r"),
1005 )
1006 rows = registry.queryDataIds(["band"]).toSet()
1007 self.assertCountEqual(
1008 rows,
1009 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
1010 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
1011 )
1013 def testAttributeManager(self):
1014 """Test basic functionality of attribute manager.
1015 """
1016 # number of attributes with schema versions in a fresh database,
1017 # 6 managers with 3 records per manager, plus config for dimensions
1018 VERSION_COUNT = 6 * 3 + 1
1020 registry = self.makeRegistry()
1021 attributes = registry._attributes
1023 # check what get() returns for non-existing key
1024 self.assertIsNone(attributes.get("attr"))
1025 self.assertEqual(attributes.get("attr", ""), "")
1026 self.assertEqual(attributes.get("attr", "Value"), "Value")
1027 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1029 # cannot store empty key or value
1030 with self.assertRaises(ValueError):
1031 attributes.set("", "value")
1032 with self.assertRaises(ValueError):
1033 attributes.set("attr", "")
1035 # set value of non-existing key
1036 attributes.set("attr", "value")
1037 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1038 self.assertEqual(attributes.get("attr"), "value")
1040 # update value of existing key
1041 with self.assertRaises(ButlerAttributeExistsError):
1042 attributes.set("attr", "value2")
1044 attributes.set("attr", "value2", force=True)
1045 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1046 self.assertEqual(attributes.get("attr"), "value2")
1048 # delete existing key
1049 self.assertTrue(attributes.delete("attr"))
1050 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1052 # delete non-existing key
1053 self.assertFalse(attributes.delete("non-attr"))
1055 # store bunch of keys and get the list back
1056 data = [
1057 ("version.core", "1.2.3"),
1058 ("version.dimensions", "3.2.1"),
1059 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1060 ]
1061 for key, value in data:
1062 attributes.set(key, value)
1063 items = dict(attributes.items())
1064 for key, value in data:
1065 self.assertEqual(items[key], value)
1067 def testQueryDatasetsDeduplication(self):
1068 """Test that the findFirst option to queryDatasets selects datasets
1069 from collections in the order given".
1070 """
1071 registry = self.makeRegistry()
1072 self.loadData(registry, "base.yaml")
1073 self.loadData(registry, "datasets.yaml")
1074 self.assertCountEqual(
1075 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1076 [
1077 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1078 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1079 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1080 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1081 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1082 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1083 ]
1084 )
1085 self.assertCountEqual(
1086 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1087 findFirst=True)),
1088 [
1089 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1090 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1091 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1092 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1093 ]
1094 )
1095 self.assertCountEqual(
1096 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1097 findFirst=True)),
1098 [
1099 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1100 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1101 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1102 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1103 ]
1104 )
1106 def testQueryResults(self):
1107 """Test querying for data IDs and then manipulating the QueryResults
1108 object returned to perform other queries.
1109 """
1110 registry = self.makeRegistry()
1111 self.loadData(registry, "base.yaml")
1112 self.loadData(registry, "datasets.yaml")
1113 bias = registry.getDatasetType("bias")
1114 flat = registry.getDatasetType("flat")
1115 # Obtain expected results from methods other than those we're testing
1116 # here. That includes:
1117 # - the dimensions of the data IDs we want to query:
1118 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1119 # - the dimensions of some other data IDs we'll extract from that:
1120 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1121 # - the data IDs we expect to obtain from the first queries:
1122 expectedDataIds = DataCoordinateSet(
1123 {
1124 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1125 universe=registry.dimensions)
1126 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1127 },
1128 graph=expectedGraph,
1129 hasFull=False,
1130 hasRecords=False,
1131 )
1132 # - the flat datasets we expect to find from those data IDs, in just
1133 # one collection (so deduplication is irrelevant):
1134 expectedFlats = [
1135 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1136 collections="imported_r"),
1137 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1138 collections="imported_r"),
1139 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1140 collections="imported_r"),
1141 ]
1142 # - the data IDs we expect to extract from that:
1143 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1144 # - the bias datasets we expect to find from those data IDs, after we
1145 # subset-out the physical_filter dimension, both with duplicates:
1146 expectedAllBiases = [
1147 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1148 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1149 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1150 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1151 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1152 ]
1153 # - ...and without duplicates:
1154 expectedDeduplicatedBiases = [
1155 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1156 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1157 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1158 ]
1159 # Test against those expected results, using a "lazy" query for the
1160 # data IDs (which re-executes that query each time we use it to do
1161 # something new).
1162 dataIds = registry.queryDataIds(
1163 ["detector", "physical_filter"],
1164 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1165 instrument="Cam1",
1166 )
1167 self.assertEqual(dataIds.graph, expectedGraph)
1168 self.assertEqual(dataIds.toSet(), expectedDataIds)
1169 self.assertCountEqual(
1170 list(
1171 dataIds.findDatasets(
1172 flat,
1173 collections=["imported_r"],
1174 )
1175 ),
1176 expectedFlats,
1177 )
1178 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1179 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1180 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1181 self.assertCountEqual(
1182 list(
1183 subsetDataIds.findDatasets(
1184 bias,
1185 collections=["imported_r", "imported_g"],
1186 findFirst=False
1187 )
1188 ),
1189 expectedAllBiases
1190 )
1191 self.assertCountEqual(
1192 list(
1193 subsetDataIds.findDatasets(
1194 bias,
1195 collections=["imported_r", "imported_g"],
1196 findFirst=True
1197 )
1198 ), expectedDeduplicatedBiases
1199 )
1200 # Materialize the bias dataset queries (only) by putting the results
1201 # into temporary tables, then repeat those tests.
1202 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1203 findFirst=False).materialize() as biases:
1204 self.assertCountEqual(list(biases), expectedAllBiases)
1205 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1206 findFirst=True).materialize() as biases:
1207 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1208 # Materialize the data ID subset query, but not the dataset queries.
1209 with subsetDataIds.materialize() as subsetDataIds:
1210 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1211 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1212 self.assertCountEqual(
1213 list(
1214 subsetDataIds.findDatasets(
1215 bias,
1216 collections=["imported_r", "imported_g"],
1217 findFirst=False
1218 )
1219 ),
1220 expectedAllBiases
1221 )
1222 self.assertCountEqual(
1223 list(
1224 subsetDataIds.findDatasets(
1225 bias,
1226 collections=["imported_r", "imported_g"],
1227 findFirst=True
1228 )
1229 ), expectedDeduplicatedBiases
1230 )
1231 # Materialize the dataset queries, too.
1232 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1233 findFirst=False).materialize() as biases:
1234 self.assertCountEqual(list(biases), expectedAllBiases)
1235 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1236 findFirst=True).materialize() as biases:
1237 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1238 # Materialize the original query, but none of the follow-up queries.
1239 with dataIds.materialize() as dataIds:
1240 self.assertEqual(dataIds.graph, expectedGraph)
1241 self.assertEqual(dataIds.toSet(), expectedDataIds)
1242 self.assertCountEqual(
1243 list(
1244 dataIds.findDatasets(
1245 flat,
1246 collections=["imported_r"],
1247 )
1248 ),
1249 expectedFlats,
1250 )
1251 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1252 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1253 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1254 self.assertCountEqual(
1255 list(
1256 subsetDataIds.findDatasets(
1257 bias,
1258 collections=["imported_r", "imported_g"],
1259 findFirst=False
1260 )
1261 ),
1262 expectedAllBiases
1263 )
1264 self.assertCountEqual(
1265 list(
1266 subsetDataIds.findDatasets(
1267 bias,
1268 collections=["imported_r", "imported_g"],
1269 findFirst=True
1270 )
1271 ), expectedDeduplicatedBiases
1272 )
1273 # Materialize just the bias dataset queries.
1274 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1275 findFirst=False).materialize() as biases:
1276 self.assertCountEqual(list(biases), expectedAllBiases)
1277 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1278 findFirst=True).materialize() as biases:
1279 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1280 # Materialize the subset data ID query, but not the dataset
1281 # queries.
1282 with subsetDataIds.materialize() as subsetDataIds:
1283 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1284 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1285 self.assertCountEqual(
1286 list(
1287 subsetDataIds.findDatasets(
1288 bias,
1289 collections=["imported_r", "imported_g"],
1290 findFirst=False
1291 )
1292 ),
1293 expectedAllBiases
1294 )
1295 self.assertCountEqual(
1296 list(
1297 subsetDataIds.findDatasets(
1298 bias,
1299 collections=["imported_r", "imported_g"],
1300 findFirst=True
1301 )
1302 ), expectedDeduplicatedBiases
1303 )
1304 # Materialize the bias dataset queries, too, so now we're
1305 # materializing every single step.
1306 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1307 findFirst=False).materialize() as biases:
1308 self.assertCountEqual(list(biases), expectedAllBiases)
1309 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1310 findFirst=True).materialize() as biases:
1311 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1313 def testEmptyDimensionsQueries(self):
1314 """Test Query and QueryResults objects in the case where there are no
1315 dimensions.
1316 """
1317 # Set up test data: one dataset type, two runs, one dataset in each.
1318 registry = self.makeRegistry()
1319 self.loadData(registry, "base.yaml")
1320 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1321 registry.registerDatasetType(schema)
1322 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1323 run1 = "run1"
1324 run2 = "run2"
1325 registry.registerRun(run1)
1326 registry.registerRun(run2)
1327 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1328 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1329 # Query directly for both of the datasets, and each one, one at a time.
1330 self.assertCountEqual(
1331 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1332 [dataset1, dataset2]
1333 )
1334 self.assertEqual(
1335 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1336 [dataset1],
1337 )
1338 self.assertEqual(
1339 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1340 [dataset2],
1341 )
1342 # Query for data IDs with no dimensions.
1343 dataIds = registry.queryDataIds([])
1344 self.assertEqual(
1345 dataIds.toSequence(),
1346 DataCoordinateSequence([dataId], registry.dimensions.empty)
1347 )
1348 # Use queried data IDs to find the datasets.
1349 self.assertCountEqual(
1350 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1351 [dataset1, dataset2],
1352 )
1353 self.assertEqual(
1354 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1355 [dataset1],
1356 )
1357 self.assertEqual(
1358 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1359 [dataset2],
1360 )
1361 # Now materialize the data ID query results and repeat those tests.
1362 with dataIds.materialize() as dataIds:
1363 self.assertEqual(
1364 dataIds.toSequence(),
1365 DataCoordinateSequence([dataId], registry.dimensions.empty)
1366 )
1367 self.assertCountEqual(
1368 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1369 [dataset1, dataset2],
1370 )
1371 self.assertEqual(
1372 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1373 [dataset1],
1374 )
1375 self.assertEqual(
1376 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1377 [dataset2],
1378 )
1379 # Query for non-empty data IDs, then subset that to get the empty one.
1380 # Repeat the above tests starting from that.
1381 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1382 self.assertEqual(
1383 dataIds.toSequence(),
1384 DataCoordinateSequence([dataId], registry.dimensions.empty)
1385 )
1386 self.assertCountEqual(
1387 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1388 [dataset1, dataset2],
1389 )
1390 self.assertEqual(
1391 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1392 [dataset1],
1393 )
1394 self.assertEqual(
1395 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1396 [dataset2],
1397 )
1398 with dataIds.materialize() as dataIds:
1399 self.assertEqual(
1400 dataIds.toSequence(),
1401 DataCoordinateSequence([dataId], registry.dimensions.empty)
1402 )
1403 self.assertCountEqual(
1404 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1405 [dataset1, dataset2],
1406 )
1407 self.assertEqual(
1408 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1409 [dataset1],
1410 )
1411 self.assertEqual(
1412 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1413 [dataset2],
1414 )
1415 # Query for non-empty data IDs, then materialize, then subset to get
1416 # the empty one. Repeat again.
1417 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1418 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1419 self.assertEqual(
1420 dataIds.toSequence(),
1421 DataCoordinateSequence([dataId], registry.dimensions.empty)
1422 )
1423 self.assertCountEqual(
1424 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1425 [dataset1, dataset2],
1426 )
1427 self.assertEqual(
1428 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1429 [dataset1],
1430 )
1431 self.assertEqual(
1432 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1433 [dataset2],
1434 )
1435 with dataIds.materialize() as dataIds:
1436 self.assertEqual(
1437 dataIds.toSequence(),
1438 DataCoordinateSequence([dataId], registry.dimensions.empty)
1439 )
1440 self.assertCountEqual(
1441 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1442 [dataset1, dataset2],
1443 )
1444 self.assertEqual(
1445 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1446 [dataset1],
1447 )
1448 self.assertEqual(
1449 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1450 [dataset2],
1451 )
1453 def testCalibrationCollections(self):
1454 """Test operations on `~CollectionType.CALIBRATION` collections,
1455 including `Registry.certify`, `Registry.decertify`, and
1456 `Registry.findDataset`.
1457 """
1458 # Setup - make a Registry, fill it with some datasets in
1459 # non-calibration collections.
1460 registry = self.makeRegistry()
1461 self.loadData(registry, "base.yaml")
1462 self.loadData(registry, "datasets.yaml")
1463 # Set up some timestamps.
1464 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1465 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1466 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1467 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1468 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1469 allTimespans = [
1470 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1471 ]
1472 # Get references to some datasets.
1473 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1474 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1475 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1476 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1477 # Register the main calibration collection we'll be working with.
1478 collection = "Cam1/calibs/default"
1479 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1480 # Cannot associate into a calibration collection (no timespan).
1481 with self.assertRaises(TypeError):
1482 registry.associate(collection, [bias2a])
1483 # Certify 2a dataset with [t2, t4) validity.
1484 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1485 # We should not be able to certify 2b with anything overlapping that
1486 # window.
1487 with self.assertRaises(ConflictingDefinitionError):
1488 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1489 with self.assertRaises(ConflictingDefinitionError):
1490 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1491 with self.assertRaises(ConflictingDefinitionError):
1492 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1493 with self.assertRaises(ConflictingDefinitionError):
1494 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1495 with self.assertRaises(ConflictingDefinitionError):
1496 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1497 with self.assertRaises(ConflictingDefinitionError):
1498 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1499 with self.assertRaises(ConflictingDefinitionError):
1500 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1501 with self.assertRaises(ConflictingDefinitionError):
1502 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1503 # We should be able to certify 3a with a range overlapping that window,
1504 # because it's for a different detector.
1505 # We'll certify 3a over [t1, t3).
1506 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1507 # Now we'll certify 2b and 3b together over [t4, ∞).
1508 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1510 # Fetch all associations and check that they are what we expect.
1511 self.assertCountEqual(
1512 list(
1513 registry.queryDatasetAssociations(
1514 "bias",
1515 collections=[collection, "imported_g", "imported_r"],
1516 )
1517 ),
1518 [
1519 DatasetAssociation(
1520 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1521 collection="imported_g",
1522 timespan=None,
1523 ),
1524 DatasetAssociation(
1525 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1526 collection="imported_r",
1527 timespan=None,
1528 ),
1529 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1530 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1531 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1532 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1533 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1534 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1535 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1536 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1537 ]
1538 )
1540 class Ambiguous:
1541 """Tag class to denote lookups that are expected to be ambiguous.
1542 """
1543 pass
1545 def assertLookup(detector: int, timespan: Timespan,
1546 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1547 """Local function that asserts that a bias lookup returns the given
1548 expected result.
1549 """
1550 if expected is Ambiguous:
1551 with self.assertRaises(RuntimeError):
1552 registry.findDataset("bias", collections=collection, instrument="Cam1",
1553 detector=detector, timespan=timespan)
1554 else:
1555 self.assertEqual(
1556 expected,
1557 registry.findDataset("bias", collections=collection, instrument="Cam1",
1558 detector=detector, timespan=timespan)
1559 )
1561 # Systematically test lookups against expected results.
1562 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1563 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1564 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1565 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1566 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1567 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1568 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1569 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1570 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1571 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1572 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1573 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1574 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1575 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1576 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1577 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1578 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1579 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1580 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1581 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1582 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1583 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1584 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1585 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1586 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1587 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1588 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1589 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1590 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1591 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1592 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1593 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1594 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1595 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1596 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1597 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1598 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1599 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1600 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1601 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1602 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1603 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1605 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1606 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1607 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1608 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1609 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1610 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1611 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1612 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1613 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1614 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1615 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1616 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1617 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1618 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1619 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1620 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1621 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1622 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1623 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1624 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1625 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1626 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1627 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1628 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1629 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1630 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1631 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1632 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1633 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1634 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1635 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1636 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1637 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1638 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1639 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1640 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1641 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1642 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1643 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1644 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1645 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1646 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1647 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1648 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1649 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1650 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1652 # Decertify everything, this time with explicit data IDs, then check
1653 # that no lookups succeed.
1654 registry.decertify(
1655 collection, "bias", Timespan(None, None),
1656 dataIds=[
1657 dict(instrument="Cam1", detector=2),
1658 dict(instrument="Cam1", detector=3),
1659 ]
1660 )
1661 for detector in (2, 3):
1662 for timespan in allTimespans:
1663 assertLookup(detector=detector, timespan=timespan, expected=None)
1664 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1665 # those.
1666 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1667 for timespan in allTimespans:
1668 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1669 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1670 # Decertify just bias2 over [t2, t4).
1671 # This should split a single certification row into two (and leave the
1672 # other existing row, for bias3a, alone).
1673 registry.decertify(collection, "bias", Timespan(t2, t4),
1674 dataIds=[dict(instrument="Cam1", detector=2)])
1675 for timespan in allTimespans:
1676 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1677 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1678 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1679 if overlapsBefore and overlapsAfter:
1680 expected = Ambiguous
1681 elif overlapsBefore or overlapsAfter:
1682 expected = bias2a
1683 else:
1684 expected = None
1685 assertLookup(detector=2, timespan=timespan, expected=expected)
1687 def testIngestTimeQuery(self):
1689 registry = self.makeRegistry()
1690 self.loadData(registry, "base.yaml")
1691 self.loadData(registry, "datasets.yaml")
1693 datasets = list(registry.queryDatasets(..., collections=...))
1694 len0 = len(datasets)
1695 self.assertGreater(len0, 0)
1697 where = "ingest_date > T'2000-01-01'"
1698 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1699 len1 = len(datasets)
1700 self.assertEqual(len0, len1)
1702 # no one will ever use this piece of software in 30 years
1703 where = "ingest_date > T'2050-01-01'"
1704 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1705 len2 = len(datasets)
1706 self.assertEqual(len2, 0)
1708 def testTimespanQueries(self):
1709 """Test query expressions involving timespans.
1710 """
1711 registry = self.makeRegistry()
1712 self.loadData(registry, "hsc-rc2-subset.yaml")
1713 # All exposures in the database; mapping from ID to timespan.
1714 visits = {record.id: record.timespan for record in registry.queryDimensionRecords("visit")}
1715 # Just those IDs, sorted (which is also temporal sorting, because HSC
1716 # exposure IDs are monotonically increasing).
1717 ids = sorted(visits.keys())
1718 self.assertGreater(len(ids), 20)
1719 # Pick some quasi-random indexes into `ids` to play with.
1720 i1 = int(len(ids)*0.1)
1721 i2 = int(len(ids)*0.3)
1722 i3 = int(len(ids)*0.6)
1723 i4 = int(len(ids)*0.8)
1724 # Extract some times from those: just before the beginning of i1 (which
1725 # should be after the end of the exposure before), exactly the
1726 # beginning of i2, just after the beginning of i3 (and before its end),
1727 # and the exact end of i4.
1728 t1 = visits[ids[i1]].begin - astropy.time.TimeDelta(1.0, format="sec")
1729 self.assertGreater(t1, visits[ids[i1 - 1]].end)
1730 t2 = visits[ids[i2]].begin
1731 t3 = visits[ids[i3]].begin + astropy.time.TimeDelta(1.0, format="sec")
1732 self.assertLess(t3, visits[ids[i3]].end)
1733 t4 = visits[ids[i4]].end
1734 # Make sure those are actually in order.
1735 self.assertEqual([t1, t2, t3, t4], sorted([t4, t3, t2, t1]))
1737 bind = {
1738 "t1": t1,
1739 "t2": t2,
1740 "t3": t3,
1741 "t4": t4,
1742 "ts23": Timespan(t2, t3),
1743 }
1745 def query(where):
1746 """Helper function that queries for visit data IDs and returns
1747 results as a sorted, deduplicated list of visit IDs.
1748 """
1749 return sorted(
1750 {dataId["visit"] for dataId in registry.queryDataIds("visit",
1751 instrument="HSC",
1752 bind=bind,
1753 where=where)}
1754 )
1756 # Try a bunch of timespan queries, mixing up the bounds themselves,
1757 # where they appear in the expression, and how we get the timespan into
1758 # the expression.
1760 # t1 is before the start of i1, so this should not include i1.
1761 self.assertEqual(ids[:i1], query("visit.timespan OVERLAPS (null, t1)"))
1762 # t2 is exactly at the start of i2, but ends are exclusive, so these
1763 # should not include i2.
1764 self.assertEqual(ids[i1:i2], query("(t1, t2) OVERLAPS visit.timespan"))
1765 self.assertEqual(ids[:i2], query("visit.timespan < (t2, t4)"))
1766 # t3 is in the middle of i3, so this should include i3.
1767 self.assertEqual(ids[i2:i3 + 1], query("visit.timespan OVERLAPS ts23"))
1768 # This one should not include t3 by the same reasoning.
1769 self.assertEqual(ids[i3 + 1:], query("visit.timespan > (t1, t3)"))
1770 # t4 is exactly at the end of i4, so this should include i4.
1771 self.assertEqual(ids[i3:i4 + 1], query(f"visit.timespan OVERLAPS (T'{t3.tai.isot}', t4)"))
1772 # i4's upper bound of t4 is exclusive so this should not include t4.
1773 self.assertEqual(ids[i4 + 1:], query("visit.timespan OVERLAPS (t4, NULL)"))
1775 # Now some timespan vs. time scalar queries.
1776 self.assertEqual(ids[:i2], query("visit.timespan < t2"))
1777 self.assertEqual(ids[:i2], query("t2 > visit.timespan"))
1778 self.assertEqual(ids[i3 + 1:], query("visit.timespan > t3"))
1779 self.assertEqual(ids[i3 + 1:], query("t3 < visit.timespan"))
1780 self.assertEqual(ids[i3:i3+1], query("visit.timespan OVERLAPS t3"))
1781 self.assertEqual(ids[i3:i3+1], query(f"T'{t3.tai.isot}' OVERLAPS visit.timespan"))
1783 # Empty timespans should not overlap anything.
1784 self.assertEqual([], query("visit.timespan OVERLAPS (t3, t2)"))