Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26import os
28import astropy.time
29import sqlalchemy
31from ...core import (
32 DataCoordinate,
33 DatasetType,
34 DimensionGraph,
35 StorageClass,
36 ddl,
37 YamlRepoImportBackend
38)
39from .._registry import Registry, CollectionType, ConflictingDefinitionError, OrphanedRecordError
40from ..wildcards import DatasetTypeRestriction
43class RegistryTests(ABC):
44 """Generic tests for the `Registry` class that can be subclassed to
45 generate tests for different configurations.
46 """
48 @classmethod
49 @abstractmethod
50 def getDataDir(cls) -> str:
51 """Return the root directory containing test data YAML files.
52 """
53 raise NotImplementedError()
55 @abstractmethod
56 def makeRegistry(self) -> Registry:
57 """Return the Registry instance to be tested.
58 """
59 raise NotImplementedError()
61 def loadData(self, registry: Registry, filename: str):
62 """Load registry test data from ``getDataDir/<filename>``,
63 which should be a YAML import/export file.
64 """
65 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
66 backend = YamlRepoImportBackend(stream, registry)
67 backend.register()
68 backend.load(datastore=None)
70 def assertRowCount(self, registry: Registry, table: str, count: int):
71 """Check the number of rows in table.
72 """
73 # TODO: all tests that rely on this method should be rewritten, as it
74 # needs to depend on Registry implementation details to have any chance
75 # of working.
76 sql = sqlalchemy.sql.select(
77 [sqlalchemy.sql.func.count()]
78 ).select_from(
79 getattr(registry._tables, table)
80 )
81 self.assertEqual(registry._db.query(sql).scalar(), count)
83 def testOpaque(self):
84 """Tests for `Registry.registerOpaqueTable`,
85 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
86 `Registry.deleteOpaqueData`.
87 """
88 registry = self.makeRegistry()
89 table = "opaque_table_for_testing"
90 registry.registerOpaqueTable(
91 table,
92 spec=ddl.TableSpec(
93 fields=[
94 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
95 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
96 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
97 ],
98 )
99 )
100 rows = [
101 {"id": 1, "name": "one", "count": None},
102 {"id": 2, "name": "two", "count": 5},
103 {"id": 3, "name": "three", "count": 6},
104 ]
105 registry.insertOpaqueData(table, *rows)
106 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
107 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
108 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
109 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
110 registry.deleteOpaqueData(table, id=3)
111 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
112 registry.deleteOpaqueData(table)
113 self.assertEqual([], list(registry.fetchOpaqueData(table)))
115 def testDatasetType(self):
116 """Tests for `Registry.registerDatasetType` and
117 `Registry.getDatasetType`.
118 """
119 registry = self.makeRegistry()
120 # Check valid insert
121 datasetTypeName = "test"
122 storageClass = StorageClass("testDatasetType")
123 registry.storageClasses.registerStorageClass(storageClass)
124 dimensions = registry.dimensions.extract(("instrument", "visit"))
125 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
126 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
127 # Inserting for the first time should return True
128 self.assertTrue(registry.registerDatasetType(inDatasetType))
129 outDatasetType1 = registry.getDatasetType(datasetTypeName)
130 self.assertEqual(outDatasetType1, inDatasetType)
132 # Re-inserting should work
133 self.assertFalse(registry.registerDatasetType(inDatasetType))
134 # Except when they are not identical
135 with self.assertRaises(ConflictingDefinitionError):
136 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
137 registry.registerDatasetType(nonIdenticalDatasetType)
139 # Template can be None
140 datasetTypeName = "testNoneTemplate"
141 storageClass = StorageClass("testDatasetType2")
142 registry.storageClasses.registerStorageClass(storageClass)
143 dimensions = registry.dimensions.extract(("instrument", "visit"))
144 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
145 registry.registerDatasetType(inDatasetType)
146 outDatasetType2 = registry.getDatasetType(datasetTypeName)
147 self.assertEqual(outDatasetType2, inDatasetType)
149 allTypes = set(registry.queryDatasetTypes())
150 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
152 def testDimensions(self):
153 """Tests for `Registry.insertDimensionData` and
154 `Registry.expandDataId`.
155 """
156 registry = self.makeRegistry()
157 dimensionName = "instrument"
158 dimension = registry.dimensions[dimensionName]
159 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
160 "class_name": "lsst.obs.base.Instrument"}
161 registry.insertDimensionData(dimensionName, dimensionValue)
162 # Inserting the same value twice should fail
163 with self.assertRaises(sqlalchemy.exc.IntegrityError):
164 registry.insertDimensionData(dimensionName, dimensionValue)
165 # expandDataId should retrieve the record we just inserted
166 self.assertEqual(
167 registry.expandDataId(
168 instrument="DummyCam",
169 graph=dimension.graph
170 ).records[dimensionName].toDict(),
171 dimensionValue
172 )
173 # expandDataId should raise if there is no record with the given ID.
174 with self.assertRaises(LookupError):
175 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
176 # abstract_filter doesn't have a table; insert should fail.
177 with self.assertRaises(TypeError):
178 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
179 dimensionName2 = "physical_filter"
180 dimension2 = registry.dimensions[dimensionName2]
181 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
182 # Missing required dependency ("instrument") should fail
183 with self.assertRaises(sqlalchemy.exc.IntegrityError):
184 registry.insertDimensionData(dimensionName2, dimensionValue2)
185 # Adding required dependency should fix the failure
186 dimensionValue2["instrument"] = "DummyCam"
187 registry.insertDimensionData(dimensionName2, dimensionValue2)
188 # expandDataId should retrieve the record we just inserted.
189 self.assertEqual(
190 registry.expandDataId(
191 instrument="DummyCam", physical_filter="DummyCam_i",
192 graph=dimension2.graph
193 ).records[dimensionName2].toDict(),
194 dimensionValue2
195 )
197 def testDataset(self):
198 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
199 and `Registry.removeDataset`.
200 """
201 registry = self.makeRegistry()
202 self.loadData(registry, "base.yaml")
203 run = "test"
204 registry.registerRun(run)
205 datasetType = registry.getDatasetType("permabias")
206 dataId = {"instrument": "Cam1", "detector": 2}
207 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
208 outRef = registry.getDataset(ref.id)
209 self.assertIsNotNone(ref.id)
210 self.assertEqual(ref, outRef)
211 with self.assertRaises(ConflictingDefinitionError):
212 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
213 registry.removeDataset(ref)
214 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
216 def testComponents(self):
217 """Tests for `Registry.attachComponent` and other dataset operations
218 on composite datasets.
219 """
220 registry = self.makeRegistry()
221 self.loadData(registry, "base.yaml")
222 run = "test"
223 registry.registerRun(run)
224 parentDatasetType = registry.getDatasetType("permabias")
225 childDatasetType1 = registry.getDatasetType("permabias.image")
226 childDatasetType2 = registry.getDatasetType("permabias.mask")
227 dataId = {"instrument": "Cam1", "detector": 2}
228 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run)
229 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0],
230 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]}
231 for name, child in children.items():
232 registry.attachComponent(name, parent, child)
233 self.assertEqual(parent.components, children)
234 outParent = registry.getDataset(parent.id)
235 self.assertEqual(outParent.components, children)
236 # Remove the parent; this should remove all children.
237 registry.removeDataset(parent)
238 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run]))
239 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run]))
240 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run]))
242 def testFindDataset(self):
243 """Tests for `Registry.findDataset`.
244 """
245 registry = self.makeRegistry()
246 self.loadData(registry, "base.yaml")
247 run = "test"
248 datasetType = registry.getDatasetType("permabias")
249 dataId = {"instrument": "Cam1", "detector": 4}
250 registry.registerRun(run)
251 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
252 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
253 self.assertEqual(outputRef, inputRef)
254 # Check that retrieval with invalid dataId raises
255 with self.assertRaises(LookupError):
256 dataId = {"instrument": "Cam1"} # no detector
257 registry.findDataset(datasetType, dataId, collections=run)
258 # Check that different dataIds match to different datasets
259 dataId1 = {"instrument": "Cam1", "detector": 1}
260 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
261 dataId2 = {"instrument": "Cam1", "detector": 2}
262 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
263 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
264 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
265 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
266 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
267 # Check that requesting a non-existing dataId returns None
268 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
269 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
271 def testCollections(self):
272 """Tests for registry methods that manage collections.
273 """
274 registry = self.makeRegistry()
275 self.loadData(registry, "base.yaml")
276 self.loadData(registry, "datasets.yaml")
277 run1 = "imported_g"
278 run2 = "imported_r"
279 datasetType = "permabias"
280 # Find some datasets via their run's collection.
281 dataId1 = {"instrument": "Cam1", "detector": 1}
282 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
283 self.assertIsNotNone(ref1)
284 dataId2 = {"instrument": "Cam1", "detector": 2}
285 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
286 self.assertIsNotNone(ref2)
287 # Associate those into a new collection,then look for them there.
288 tag1 = "tag1"
289 registry.registerCollection(tag1, type=CollectionType.TAGGED)
290 registry.associate(tag1, [ref1, ref2])
291 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
292 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
293 # Disassociate one and verify that we can't it there anymore...
294 registry.disassociate(tag1, [ref1])
295 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
296 # ...but we can still find ref2 in tag1, and ref1 in the run.
297 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
298 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
299 collections = set(registry.queryCollections())
300 self.assertEqual(collections, {run1, run2, tag1})
301 # Associate both refs into tag1 again; ref2 is already there, but that
302 # should be a harmless no-op.
303 registry.associate(tag1, [ref1, ref2])
304 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
305 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
306 # Get a different dataset (from a different run) that has the same
307 # dataset type and data ID as ref2.
308 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
309 self.assertNotEqual(ref2, ref2b)
310 # Attempting to associate that into tag1 should be an error.
311 with self.assertRaises(ConflictingDefinitionError):
312 registry.associate(tag1, [ref2b])
313 # That error shouldn't have messed up what we had before.
314 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
315 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
316 # Attempt to associate the conflicting dataset again, this time with
317 # a dataset that isn't in the collection and won't cause a conflict.
318 # Should also fail without modifying anything.
319 dataId3 = {"instrument": "Cam1", "detector": 3}
320 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
321 with self.assertRaises(ConflictingDefinitionError):
322 registry.associate(tag1, [ref3, ref2b])
323 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
324 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
325 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
326 # Register a chained collection that searches:
327 # 1. 'tag1'
328 # 2. 'run1', but only for the permaflat dataset
329 # 3. 'run2'
330 chain1 = "chain1"
331 registry.registerCollection(chain1, type=CollectionType.CHAINED)
332 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
333 # Chained collection exists, but has no collections in it.
334 self.assertFalse(registry.getCollectionChain(chain1))
335 # If we query for all collections, we should get the chained collection
336 # only if we don't ask to flatten it (i.e. yield only its children).
337 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
338 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
339 # Attempt to set its child collections to something circular; that
340 # should fail.
341 with self.assertRaises(ValueError):
342 registry.setCollectionChain(chain1, [tag1, chain1])
343 # Add the child collections.
344 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2])
345 self.assertEqual(
346 list(registry.getCollectionChain(chain1)),
347 [(tag1, DatasetTypeRestriction.any),
348 (run1, DatasetTypeRestriction.fromExpression("permaflat")),
349 (run2, DatasetTypeRestriction.any)]
350 )
351 # Searching for dataId1 or dataId2 in the chain should return ref1 and
352 # ref2, because both are in tag1.
353 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
354 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
355 # Now disassociate ref2 from tag1. The search (for permabias) with
356 # dataId2 in chain1 should then:
357 # 1. not find it in tag1
358 # 2. not look in tag2, because it's restricted to permaflat here
359 # 3. find a different dataset in run2
360 registry.disassociate(tag1, [ref2])
361 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
362 self.assertNotEqual(ref2b, ref2)
363 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
364 # Look in the chain for a permaflat that is in run1; should get the
365 # same ref as if we'd searched run1 directly.
366 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}
367 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1),
368 registry.findDataset("permaflat", dataId3, collections=run1),)
369 # Define a new chain so we can test recursive chains.
370 chain2 = "chain2"
371 registry.registerCollection(chain2, type=CollectionType.CHAINED)
372 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1])
373 # Search for permabias with dataId1 should find it via tag1 in chain2,
374 # recursing, because is not in run1.
375 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
376 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
377 # Search for permabias with dataId2 should find it in run2 (ref2b).
378 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
379 # Search for a permaflat that is in run2. That should not be found
380 # at the front of chain2, because of the restriction to permabias
381 # on run2 there, but it should be found in at the end of chain1.
382 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
383 ref4 = registry.findDataset("permaflat", dataId4, collections=run2)
384 self.assertIsNotNone(ref4)
385 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2))
387 def testDatasetLocations(self):
388 """Tests for `Registry.insertDatasetLocations`,
389 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`.
390 """
391 registry = self.makeRegistry()
392 self.loadData(registry, "base.yaml")
393 self.loadData(registry, "datasets.yaml")
394 run = "imported_g"
395 ref = registry.findDataset("permabias", dataId={"instrument": "Cam1", "detector": 1}, collections=run)
396 ref2 = registry.findDataset("permaflat",
397 dataId={"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G"},
398 collections=run)
399 datastoreName = "dummystore"
400 datastoreName2 = "dummystore2"
401 # Test adding information about a new dataset
402 registry.insertDatasetLocations(datastoreName, [ref])
403 addresses = registry.getDatasetLocations(ref)
404 self.assertIn(datastoreName, addresses)
405 self.assertEqual(len(addresses), 1)
406 registry.insertDatasetLocations(datastoreName2, [ref, ref2])
407 addresses = registry.getDatasetLocations(ref)
408 self.assertEqual(len(addresses), 2)
409 self.assertIn(datastoreName, addresses)
410 self.assertIn(datastoreName2, addresses)
411 registry.removeDatasetLocation(datastoreName, ref)
412 addresses = registry.getDatasetLocations(ref)
413 self.assertEqual(len(addresses), 1)
414 self.assertNotIn(datastoreName, addresses)
415 self.assertIn(datastoreName2, addresses)
416 with self.assertRaises(OrphanedRecordError):
417 registry.removeDataset(ref)
418 registry.removeDatasetLocation(datastoreName2, ref)
419 addresses = registry.getDatasetLocations(ref)
420 self.assertEqual(len(addresses), 0)
421 self.assertNotIn(datastoreName2, addresses)
422 registry.removeDataset(ref) # should not raise
423 addresses = registry.getDatasetLocations(ref2)
424 self.assertEqual(len(addresses), 1)
425 self.assertIn(datastoreName2, addresses)
427 def testBasicTransaction(self):
428 """Test that all operations within a single transaction block are
429 rolled back if an exception propagates out of the block.
430 """
431 registry = self.makeRegistry()
432 storageClass = StorageClass("testDatasetType")
433 registry.storageClasses.registerStorageClass(storageClass)
434 dimensions = registry.dimensions.extract(("instrument",))
435 dataId = {"instrument": "DummyCam"}
436 datasetTypeA = DatasetType(name="A",
437 dimensions=dimensions,
438 storageClass=storageClass)
439 datasetTypeB = DatasetType(name="B",
440 dimensions=dimensions,
441 storageClass=storageClass)
442 datasetTypeC = DatasetType(name="C",
443 dimensions=dimensions,
444 storageClass=storageClass)
445 run = "test"
446 registry.registerRun(run)
447 refId = None
448 with registry.transaction():
449 registry.registerDatasetType(datasetTypeA)
450 with self.assertRaises(ValueError):
451 with registry.transaction():
452 registry.registerDatasetType(datasetTypeB)
453 registry.registerDatasetType(datasetTypeC)
454 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
455 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run)
456 refId = ref.id
457 raise ValueError("Oops, something went wrong")
458 # A should exist
459 self.assertEqual(registry.getDatasetType("A"), datasetTypeA)
460 # But B and C should both not exist
461 with self.assertRaises(KeyError):
462 registry.getDatasetType("B")
463 with self.assertRaises(KeyError):
464 registry.getDatasetType("C")
465 # And neither should the dataset
466 self.assertIsNotNone(refId)
467 self.assertIsNone(registry.getDataset(refId))
468 # Or the Dimension entries
469 with self.assertRaises(LookupError):
470 registry.expandDataId({"instrument": "DummyCam"})
472 def testNestedTransaction(self):
473 """Test that operations within a transaction block are not rolled back
474 if an exception propagates out of an inner transaction block and is
475 then caught.
476 """
477 registry = self.makeRegistry()
478 dimension = registry.dimensions["instrument"]
479 dataId1 = {"instrument": "DummyCam"}
480 dataId2 = {"instrument": "DummyCam2"}
481 checkpointReached = False
482 with registry.transaction():
483 # This should be added and (ultimately) committed.
484 registry.insertDimensionData(dimension, dataId1)
485 with self.assertRaises(sqlalchemy.exc.IntegrityError):
486 with registry.transaction():
487 # This does not conflict, and should succeed (but not
488 # be committed).
489 registry.insertDimensionData(dimension, dataId2)
490 checkpointReached = True
491 # This should conflict and raise, triggerring a rollback
492 # of the previous insertion within the same transaction
493 # context, but not the original insertion in the outer
494 # block.
495 registry.insertDimensionData(dimension, dataId1)
496 self.assertTrue(checkpointReached)
497 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
498 with self.assertRaises(LookupError):
499 registry.expandDataId(dataId2, graph=dimension.graph)
501 def testInstrumentDimensions(self):
502 """Test queries involving only instrument dimensions, with no joins to
503 skymap."""
504 registry = self.makeRegistry()
506 # need a bunch of dimensions and datasets for test
507 registry.insertDimensionData(
508 "instrument",
509 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
510 )
511 registry.insertDimensionData(
512 "physical_filter",
513 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
514 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
515 )
516 registry.insertDimensionData(
517 "detector",
518 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
519 )
520 registry.insertDimensionData(
521 "visit",
522 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
523 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
524 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
525 )
526 registry.insertDimensionData(
527 "exposure",
528 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"),
529 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"),
530 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"),
531 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"),
532 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"),
533 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"),
534 )
535 # dataset types
536 run1 = "test1_r"
537 run2 = "test2_r"
538 tagged2 = "test2_t"
539 registry.registerRun(run1)
540 registry.registerRun(run2)
541 registry.registerCollection(tagged2)
542 storageClass = StorageClass("testDataset")
543 registry.storageClasses.registerStorageClass(storageClass)
544 rawType = DatasetType(name="RAW",
545 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
546 storageClass=storageClass)
547 registry.registerDatasetType(rawType)
548 calexpType = DatasetType(name="CALEXP",
549 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
550 storageClass=storageClass)
551 registry.registerDatasetType(calexpType)
553 # add pre-existing datasets
554 for exposure in (100, 101, 110, 111):
555 for detector in (1, 2, 3):
556 # note that only 3 of 5 detectors have datasets
557 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
558 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
559 # exposures 100 and 101 appear in both run1 and tagged2.
560 # 100 has different datasets in the different collections
561 # 101 has the same dataset in both collections.
562 if exposure == 100:
563 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
564 if exposure in (100, 101):
565 registry.associate(tagged2, [ref])
566 # Add pre-existing datasets to tagged2.
567 for exposure in (200, 201):
568 for detector in (3, 4, 5):
569 # note that only 3 of 5 detectors have datasets
570 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
571 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
572 registry.associate(tagged2, [ref])
574 dimensions = DimensionGraph(
575 registry.dimensions,
576 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
577 )
578 # Test that single dim string works as well as list of str
579 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True))
580 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True))
581 self.assertEqual(rows, rowsI)
582 # with empty expression
583 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True))
584 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
585 for dataId in rows:
586 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
587 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
588 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
589 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
590 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
591 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
592 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
593 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
594 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
595 (100, 101, 110, 111))
596 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
597 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
599 # second collection
600 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2))
601 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
602 for dataId in rows:
603 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
604 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
605 (100, 101, 200, 201))
606 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
607 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
609 # with two input datasets
610 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2]))
611 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
612 for dataId in rows:
613 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
614 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
615 (100, 101, 110, 111, 200, 201))
616 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
617 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
619 # limit to single visit
620 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
621 where="visit = 10"))
622 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
623 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
624 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
625 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
627 # more limiting expression, using link names instead of Table.column
628 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
629 where="visit = 10 and detector > 1"))
630 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
631 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
632 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
633 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
635 # expression excludes everything
636 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
637 where="visit > 1000"))
638 self.assertEqual(len(rows), 0)
640 # Selecting by physical_filter, this is not in the dimensions, but it
641 # is a part of the full expression so it should work too.
642 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
643 where="physical_filter = 'dummy_r'"))
644 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
645 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
646 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
647 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
649 def testSkyMapDimensions(self):
650 """Tests involving only skymap dimensions, no joins to instrument."""
651 registry = self.makeRegistry()
653 # need a bunch of dimensions and datasets for test, we want
654 # "abstract_filter" in the test so also have to add physical_filter
655 # dimensions
656 registry.insertDimensionData(
657 "instrument",
658 dict(instrument="DummyCam")
659 )
660 registry.insertDimensionData(
661 "physical_filter",
662 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
663 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
664 )
665 registry.insertDimensionData(
666 "skymap",
667 dict(name="DummyMap", hash="sha!".encode("utf8"))
668 )
669 for tract in range(10):
670 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
671 registry.insertDimensionData(
672 "patch",
673 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
674 for patch in range(10)]
675 )
677 # dataset types
678 run = "test"
679 registry.registerRun(run)
680 storageClass = StorageClass("testDataset")
681 registry.storageClasses.registerStorageClass(storageClass)
682 calexpType = DatasetType(name="deepCoadd_calexp",
683 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
684 "abstract_filter")),
685 storageClass=storageClass)
686 registry.registerDatasetType(calexpType)
687 mergeType = DatasetType(name="deepCoadd_mergeDet",
688 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
689 storageClass=storageClass)
690 registry.registerDatasetType(mergeType)
691 measType = DatasetType(name="deepCoadd_meas",
692 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
693 "abstract_filter")),
694 storageClass=storageClass)
695 registry.registerDatasetType(measType)
697 dimensions = DimensionGraph(
698 registry.dimensions,
699 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
700 | measType.dimensions.required)
701 )
703 # add pre-existing datasets
704 for tract in (1, 3, 5):
705 for patch in (2, 4, 6, 7):
706 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
707 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
708 for aFilter in ("i", "r"):
709 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
710 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
712 # with empty expression
713 rows = list(registry.queryDimensions(dimensions,
714 datasets=[calexpType, mergeType], collections=run))
715 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
716 for dataId in rows:
717 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
718 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
719 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
720 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
722 # limit to 2 tracts and 2 patches
723 rows = list(registry.queryDimensions(dimensions,
724 datasets=[calexpType, mergeType], collections=run,
725 where="tract IN (1, 5) AND patch IN (2, 7)"))
726 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
727 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
728 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
729 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
731 # limit to single filter
732 rows = list(registry.queryDimensions(dimensions,
733 datasets=[calexpType, mergeType], collections=run,
734 where="abstract_filter = 'i'"))
735 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
736 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
737 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
738 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
740 # expression excludes everything, specifying non-existing skymap is
741 # not a fatal error, it's operator error
742 rows = list(registry.queryDimensions(dimensions,
743 datasets=[calexpType, mergeType], collections=run,
744 where="skymap = 'Mars'"))
745 self.assertEqual(len(rows), 0)
747 def testSpatialMatch(self):
748 """Test involving spatial match using join tables.
750 Note that realistic test needs a reasonably-defined skypix and regions
751 in registry tables which is hard to implement in this simple test.
752 So we do not actually fill registry with any data and all queries will
753 return empty result, but this is still useful for coverage of the code
754 that generates query.
755 """
756 registry = self.makeRegistry()
758 # dataset types
759 collection = "test"
760 registry.registerRun(name=collection)
761 storageClass = StorageClass("testDataset")
762 registry.storageClasses.registerStorageClass(storageClass)
764 calexpType = DatasetType(name="CALEXP",
765 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
766 storageClass=storageClass)
767 registry.registerDatasetType(calexpType)
769 coaddType = DatasetType(name="deepCoadd_calexp",
770 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
771 "abstract_filter")),
772 storageClass=storageClass)
773 registry.registerDatasetType(coaddType)
775 dimensions = DimensionGraph(
776 registry.dimensions,
777 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
778 )
780 # without data this should run OK but return empty set
781 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection))
782 self.assertEqual(len(rows), 0)
784 def testCalibrationLabelIndirection(self):
785 """Test that we can look up datasets with calibration_label dimensions
786 from a data ID with exposure dimensions.
787 """
789 def _dt(iso_string):
790 return astropy.time.Time(iso_string, format="iso", scale="utc")
792 registry = self.makeRegistry()
794 flat = DatasetType(
795 "flat",
796 registry.dimensions.extract(
797 ["instrument", "detector", "physical_filter", "calibration_label"]
798 ),
799 "ImageU"
800 )
801 registry.registerDatasetType(flat)
802 registry.insertDimensionData("instrument", dict(name="DummyCam"))
803 registry.insertDimensionData(
804 "physical_filter",
805 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
806 )
807 registry.insertDimensionData(
808 "detector",
809 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
810 )
811 registry.insertDimensionData(
812 "visit",
813 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
814 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"),
815 )
816 registry.insertDimensionData(
817 "exposure",
818 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i",
819 datetime_begin=_dt("2005-12-15 02:00:00"), datetime_end=_dt("2005-12-15 03:00:00")),
820 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i",
821 datetime_begin=_dt("2005-12-16 02:00:00"), datetime_end=_dt("2005-12-16 03:00:00")),
822 )
823 registry.insertDimensionData(
824 "calibration_label",
825 dict(instrument="DummyCam", name="first_night",
826 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-15 04:00:00")),
827 dict(instrument="DummyCam", name="second_night",
828 datetime_begin=_dt("2005-12-16 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
829 dict(instrument="DummyCam", name="both_nights",
830 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
831 )
832 # Different flats for different nights for detectors 1-3 in first
833 # collection.
834 run1 = "calibs1"
835 registry.registerRun(run1)
836 for detector in (1, 2, 3):
837 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
838 physical_filter="dummy_i", detector=detector)],
839 run=run1)
840 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
841 physical_filter="dummy_i", detector=detector)],
842 run=run1)
843 # The same flat for both nights for detectors 3-5 (so detector 3 has
844 # multiple valid flats) in second collection.
845 run2 = "calib2"
846 registry.registerRun(run2)
847 for detector in (3, 4, 5):
848 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
849 physical_filter="dummy_i", detector=detector)],
850 run=run2)
851 # Perform queries for individual exposure+detector combinations, which
852 # should always return exactly one flat.
853 for exposure in (100, 101):
854 for detector in (1, 2, 3):
855 with self.subTest(exposure=exposure, detector=detector):
856 rows = list(registry.queryDatasets("flat", collections=[run1],
857 instrument="DummyCam",
858 exposure=exposure,
859 detector=detector))
860 self.assertEqual(len(rows), 1)
861 for detector in (3, 4, 5):
862 with self.subTest(exposure=exposure, detector=detector):
863 rows = registry.queryDatasets("flat", collections=[run2],
864 instrument="DummyCam",
865 exposure=exposure,
866 detector=detector)
867 self.assertEqual(len(list(rows)), 1)
868 for detector in (1, 2, 4, 5):
869 with self.subTest(exposure=exposure, detector=detector):
870 rows = registry.queryDatasets("flat", collections=[run1, run2],
871 instrument="DummyCam",
872 exposure=exposure,
873 detector=detector)
874 self.assertEqual(len(list(rows)), 1)
875 for detector in (3,):
876 with self.subTest(exposure=exposure, detector=detector):
877 rows = registry.queryDatasets("flat", collections=[run1, run2],
878 instrument="DummyCam",
879 exposure=exposure,
880 detector=detector)
881 self.assertEqual(len(list(rows)), 2)
883 def testAbstractFilterQuery(self):
884 """Test that we can run a query that just lists the known
885 abstract_filters. This is tricky because abstract_filter is
886 backed by a query against physical_filter.
887 """
888 registry = self.makeRegistry()
889 registry.insertDimensionData("instrument", dict(name="DummyCam"))
890 registry.insertDimensionData(
891 "physical_filter",
892 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
893 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
894 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
895 )
896 rows = list(registry.queryDimensions(["abstract_filter"]))
897 self.assertCountEqual(
898 rows,
899 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
900 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
901 )