Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26import os
28import astropy.time
29import sqlalchemy
31from ...core import (
32 DataCoordinate,
33 DatasetType,
34 DimensionGraph,
35 StorageClass,
36 ddl,
37 YamlRepoImportBackend
38)
39from .._registry import Registry, CollectionType, ConflictingDefinitionError, OrphanedRecordError
40from ..wildcards import DatasetTypeRestriction
43class RegistryTests(ABC):
44 """Generic tests for the `Registry` class that can be subclassed to
45 generate tests for different configurations.
46 """
48 @classmethod
49 @abstractmethod
50 def getDataDir(cls) -> str:
51 """Return the root directory containing test data YAML files.
52 """
53 raise NotImplementedError()
55 @abstractmethod
56 def makeRegistry(self) -> Registry:
57 """Return the Registry instance to be tested.
58 """
59 raise NotImplementedError()
61 def loadData(self, registry: Registry, filename: str):
62 """Load registry test data from ``getDataDir/<filename>``,
63 which should be a YAML import/export file.
64 """
65 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
66 backend = YamlRepoImportBackend(stream, registry)
67 backend.register()
68 backend.load(datastore=None)
70 def assertRowCount(self, registry: Registry, table: str, count: int):
71 """Check the number of rows in table.
72 """
73 # TODO: all tests that rely on this method should be rewritten, as it
74 # needs to depend on Registry implementation details to have any chance
75 # of working.
76 sql = sqlalchemy.sql.select(
77 [sqlalchemy.sql.func.count()]
78 ).select_from(
79 getattr(registry._tables, table)
80 )
81 self.assertEqual(registry._db.query(sql).scalar(), count)
83 def testOpaque(self):
84 """Tests for `Registry.registerOpaqueTable`,
85 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
86 `Registry.deleteOpaqueData`.
87 """
88 registry = self.makeRegistry()
89 table = "opaque_table_for_testing"
90 registry.registerOpaqueTable(
91 table,
92 spec=ddl.TableSpec(
93 fields=[
94 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
95 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
96 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
97 ],
98 )
99 )
100 rows = [
101 {"id": 1, "name": "one", "count": None},
102 {"id": 2, "name": "two", "count": 5},
103 {"id": 3, "name": "three", "count": 6},
104 ]
105 registry.insertOpaqueData(table, *rows)
106 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
107 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
108 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
109 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
110 registry.deleteOpaqueData(table, id=3)
111 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
112 registry.deleteOpaqueData(table)
113 self.assertEqual([], list(registry.fetchOpaqueData(table)))
115 def testDatasetType(self):
116 """Tests for `Registry.registerDatasetType` and
117 `Registry.getDatasetType`.
118 """
119 registry = self.makeRegistry()
120 # Check valid insert
121 datasetTypeName = "test"
122 storageClass = StorageClass("testDatasetType")
123 registry.storageClasses.registerStorageClass(storageClass)
124 dimensions = registry.dimensions.extract(("instrument", "visit"))
125 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
126 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
127 # Inserting for the first time should return True
128 self.assertTrue(registry.registerDatasetType(inDatasetType))
129 outDatasetType1 = registry.getDatasetType(datasetTypeName)
130 self.assertEqual(outDatasetType1, inDatasetType)
132 # Re-inserting should work
133 self.assertFalse(registry.registerDatasetType(inDatasetType))
134 # Except when they are not identical
135 with self.assertRaises(ConflictingDefinitionError):
136 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
137 registry.registerDatasetType(nonIdenticalDatasetType)
139 # Template can be None
140 datasetTypeName = "testNoneTemplate"
141 storageClass = StorageClass("testDatasetType2")
142 registry.storageClasses.registerStorageClass(storageClass)
143 dimensions = registry.dimensions.extract(("instrument", "visit"))
144 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
145 registry.registerDatasetType(inDatasetType)
146 outDatasetType2 = registry.getDatasetType(datasetTypeName)
147 self.assertEqual(outDatasetType2, inDatasetType)
149 allTypes = set(registry.queryDatasetTypes())
150 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
152 def testDimensions(self):
153 """Tests for `Registry.insertDimensionData` and
154 `Registry.expandDataId`.
155 """
156 registry = self.makeRegistry()
157 dimensionName = "instrument"
158 dimension = registry.dimensions[dimensionName]
159 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2}
160 registry.insertDimensionData(dimensionName, dimensionValue)
161 # Inserting the same value twice should fail
162 with self.assertRaises(sqlalchemy.exc.IntegrityError):
163 registry.insertDimensionData(dimensionName, dimensionValue)
164 # expandDataId should retrieve the record we just inserted
165 self.assertEqual(
166 registry.expandDataId(
167 instrument="DummyCam",
168 graph=dimension.graph
169 ).records[dimensionName].toDict(),
170 dimensionValue
171 )
172 # expandDataId should raise if there is no record with the given ID.
173 with self.assertRaises(LookupError):
174 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
175 # abstract_filter doesn't have a table; insert should fail.
176 with self.assertRaises(TypeError):
177 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
178 dimensionName2 = "physical_filter"
179 dimension2 = registry.dimensions[dimensionName2]
180 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
181 # Missing required dependency ("instrument") should fail
182 with self.assertRaises(sqlalchemy.exc.IntegrityError):
183 registry.insertDimensionData(dimensionName2, dimensionValue2)
184 # Adding required dependency should fix the failure
185 dimensionValue2["instrument"] = "DummyCam"
186 registry.insertDimensionData(dimensionName2, dimensionValue2)
187 # expandDataId should retrieve the record we just inserted.
188 self.assertEqual(
189 registry.expandDataId(
190 instrument="DummyCam", physical_filter="DummyCam_i",
191 graph=dimension2.graph
192 ).records[dimensionName2].toDict(),
193 dimensionValue2
194 )
196 def testDataset(self):
197 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
198 and `Registry.removeDataset`.
199 """
200 registry = self.makeRegistry()
201 self.loadData(registry, "base.yaml")
202 run = "test"
203 registry.registerRun(run)
204 datasetType = registry.getDatasetType("permabias")
205 dataId = {"instrument": "Cam1", "detector": 2}
206 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
207 outRef = registry.getDataset(ref.id)
208 self.assertIsNotNone(ref.id)
209 self.assertEqual(ref, outRef)
210 with self.assertRaises(ConflictingDefinitionError):
211 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
212 registry.removeDataset(ref)
213 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
215 def testComponents(self):
216 """Tests for `Registry.attachComponent` and other dataset operations
217 on composite datasets.
218 """
219 registry = self.makeRegistry()
220 self.loadData(registry, "base.yaml")
221 run = "test"
222 registry.registerRun(run)
223 parentDatasetType = registry.getDatasetType("permabias")
224 childDatasetType1 = registry.getDatasetType("permabias.image")
225 childDatasetType2 = registry.getDatasetType("permabias.mask")
226 dataId = {"instrument": "Cam1", "detector": 2}
227 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run)
228 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0],
229 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]}
230 for name, child in children.items():
231 registry.attachComponent(name, parent, child)
232 self.assertEqual(parent.components, children)
233 outParent = registry.getDataset(parent.id)
234 self.assertEqual(outParent.components, children)
235 # Remove the parent; this should remove all children.
236 registry.removeDataset(parent)
237 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run]))
238 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run]))
239 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run]))
241 def testFindDataset(self):
242 """Tests for `Registry.findDataset`.
243 """
244 registry = self.makeRegistry()
245 self.loadData(registry, "base.yaml")
246 run = "test"
247 datasetType = registry.getDatasetType("permabias")
248 dataId = {"instrument": "Cam1", "detector": 4}
249 registry.registerRun(run)
250 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
251 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
252 self.assertEqual(outputRef, inputRef)
253 # Check that retrieval with invalid dataId raises
254 with self.assertRaises(LookupError):
255 dataId = {"instrument": "Cam1"} # no detector
256 registry.findDataset(datasetType, dataId, collections=run)
257 # Check that different dataIds match to different datasets
258 dataId1 = {"instrument": "Cam1", "detector": 1}
259 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
260 dataId2 = {"instrument": "Cam1", "detector": 2}
261 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
262 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
263 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
264 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
265 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
266 # Check that requesting a non-existing dataId returns None
267 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
268 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
270 def testCollections(self):
271 """Tests for registry methods that manage collections.
272 """
273 registry = self.makeRegistry()
274 self.loadData(registry, "base.yaml")
275 self.loadData(registry, "datasets.yaml")
276 run1 = "imported_g"
277 run2 = "imported_r"
278 datasetType = "permabias"
279 # Find some datasets via their run's collection.
280 dataId1 = {"instrument": "Cam1", "detector": 1}
281 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
282 self.assertIsNotNone(ref1)
283 dataId2 = {"instrument": "Cam1", "detector": 2}
284 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
285 self.assertIsNotNone(ref2)
286 # Associate those into a new collection,then look for them there.
287 tag1 = "tag1"
288 registry.registerCollection(tag1, type=CollectionType.TAGGED)
289 registry.associate(tag1, [ref1, ref2])
290 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
291 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
292 # Disassociate one and verify that we can't it there anymore...
293 registry.disassociate(tag1, [ref1])
294 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
295 # ...but we can still find ref2 in tag1, and ref1 in the run.
296 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
297 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
298 collections = set(registry.queryCollections())
299 self.assertEqual(collections, {run1, run2, tag1})
300 # Associate both refs into tag1 again; ref2 is already there, but that
301 # should be a harmless no-op.
302 registry.associate(tag1, [ref1, ref2])
303 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
304 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
305 # Get a different dataset (from a different run) that has the same
306 # dataset type and data ID as ref2.
307 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
308 self.assertNotEqual(ref2, ref2b)
309 # Attempting to associate that into tag1 should be an error.
310 with self.assertRaises(ConflictingDefinitionError):
311 registry.associate(tag1, [ref2b])
312 # That error shouldn't have messed up what we had before.
313 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
314 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
315 # Attempt to associate the conflicting dataset again, this time with
316 # a dataset that isn't in the collection and won't cause a conflict.
317 # Should also fail without modifying anything.
318 dataId3 = {"instrument": "Cam1", "detector": 3}
319 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
320 with self.assertRaises(ConflictingDefinitionError):
321 registry.associate(tag1, [ref3, ref2b])
322 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
323 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
324 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
325 # Register a chained collection that searches:
326 # 1. 'tag1'
327 # 2. 'run1', but only for the permaflat dataset
328 # 3. 'run2'
329 chain1 = "chain1"
330 registry.registerCollection(chain1, type=CollectionType.CHAINED)
331 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
332 # Chained collection exists, but has no collections in it.
333 self.assertFalse(registry.getCollectionChain(chain1))
334 # If we query for all collections, we should get the chained collection
335 # only if we don't ask to flatten it (i.e. yield only its children).
336 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
337 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
338 # Attempt to set its child collections to something circular; that
339 # should fail.
340 with self.assertRaises(ValueError):
341 registry.setCollectionChain(chain1, [tag1, chain1])
342 # Add the child collections.
343 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2])
344 self.assertEqual(
345 list(registry.getCollectionChain(chain1)),
346 [(tag1, DatasetTypeRestriction.any),
347 (run1, DatasetTypeRestriction.fromExpression("permaflat")),
348 (run2, DatasetTypeRestriction.any)]
349 )
350 # Searching for dataId1 or dataId2 in the chain should return ref1 and
351 # ref2, because both are in tag1.
352 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
353 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
354 # Now disassociate ref2 from tag1. The search (for permabias) with
355 # dataId2 in chain1 should then:
356 # 1. not find it in tag1
357 # 2. not look in tag2, because it's restricted to permaflat here
358 # 3. find a different dataset in run2
359 registry.disassociate(tag1, [ref2])
360 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
361 self.assertNotEqual(ref2b, ref2)
362 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
363 # Look in the chain for a permaflat that is in run1; should get the
364 # same ref as if we'd searched run1 directly.
365 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}
366 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1),
367 registry.findDataset("permaflat", dataId3, collections=run1),)
368 # Define a new chain so we can test recursive chains.
369 chain2 = "chain2"
370 registry.registerCollection(chain2, type=CollectionType.CHAINED)
371 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1])
372 # Search for permabias with dataId1 should find it via tag1 in chain2,
373 # recursing, because is not in run1.
374 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
375 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
376 # Search for permabias with dataId2 should find it in run2 (ref2b).
377 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
378 # Search for a permaflat that is in run2. That should not be found
379 # at the front of chain2, because of the restriction to permabias
380 # on run2 there, but it should be found in at the end of chain1.
381 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
382 ref4 = registry.findDataset("permaflat", dataId4, collections=run2)
383 self.assertIsNotNone(ref4)
384 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2))
386 def testDatasetLocations(self):
387 """Tests for `Registry.insertDatasetLocations`,
388 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`.
389 """
390 registry = self.makeRegistry()
391 self.loadData(registry, "base.yaml")
392 self.loadData(registry, "datasets.yaml")
393 run = "imported_g"
394 ref = registry.findDataset("permabias", dataId={"instrument": "Cam1", "detector": 1}, collections=run)
395 ref2 = registry.findDataset("permaflat",
396 dataId={"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G"},
397 collections=run)
398 datastoreName = "dummystore"
399 datastoreName2 = "dummystore2"
400 # Test adding information about a new dataset
401 registry.insertDatasetLocations(datastoreName, [ref])
402 addresses = registry.getDatasetLocations(ref)
403 self.assertIn(datastoreName, addresses)
404 self.assertEqual(len(addresses), 1)
405 registry.insertDatasetLocations(datastoreName2, [ref, ref2])
406 addresses = registry.getDatasetLocations(ref)
407 self.assertEqual(len(addresses), 2)
408 self.assertIn(datastoreName, addresses)
409 self.assertIn(datastoreName2, addresses)
410 registry.removeDatasetLocation(datastoreName, ref)
411 addresses = registry.getDatasetLocations(ref)
412 self.assertEqual(len(addresses), 1)
413 self.assertNotIn(datastoreName, addresses)
414 self.assertIn(datastoreName2, addresses)
415 with self.assertRaises(OrphanedRecordError):
416 registry.removeDataset(ref)
417 registry.removeDatasetLocation(datastoreName2, ref)
418 addresses = registry.getDatasetLocations(ref)
419 self.assertEqual(len(addresses), 0)
420 self.assertNotIn(datastoreName2, addresses)
421 registry.removeDataset(ref) # should not raise
422 addresses = registry.getDatasetLocations(ref2)
423 self.assertEqual(len(addresses), 1)
424 self.assertIn(datastoreName2, addresses)
426 def testBasicTransaction(self):
427 """Test that all operations within a single transaction block are
428 rolled back if an exception propagates out of the block.
429 """
430 registry = self.makeRegistry()
431 storageClass = StorageClass("testDatasetType")
432 registry.storageClasses.registerStorageClass(storageClass)
433 dimensions = registry.dimensions.extract(("instrument",))
434 dataId = {"instrument": "DummyCam"}
435 datasetTypeA = DatasetType(name="A",
436 dimensions=dimensions,
437 storageClass=storageClass)
438 datasetTypeB = DatasetType(name="B",
439 dimensions=dimensions,
440 storageClass=storageClass)
441 datasetTypeC = DatasetType(name="C",
442 dimensions=dimensions,
443 storageClass=storageClass)
444 run = "test"
445 registry.registerRun(run)
446 refId = None
447 with registry.transaction():
448 registry.registerDatasetType(datasetTypeA)
449 with self.assertRaises(ValueError):
450 with registry.transaction():
451 registry.registerDatasetType(datasetTypeB)
452 registry.registerDatasetType(datasetTypeC)
453 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
454 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run)
455 refId = ref.id
456 raise ValueError("Oops, something went wrong")
457 # A should exist
458 self.assertEqual(registry.getDatasetType("A"), datasetTypeA)
459 # But B and C should both not exist
460 with self.assertRaises(KeyError):
461 registry.getDatasetType("B")
462 with self.assertRaises(KeyError):
463 registry.getDatasetType("C")
464 # And neither should the dataset
465 self.assertIsNotNone(refId)
466 self.assertIsNone(registry.getDataset(refId))
467 # Or the Dimension entries
468 with self.assertRaises(LookupError):
469 registry.expandDataId({"instrument": "DummyCam"})
471 def testNestedTransaction(self):
472 """Test that operations within a transaction block are not rolled back
473 if an exception propagates out of an inner transaction block and is
474 then caught.
475 """
476 registry = self.makeRegistry()
477 dimension = registry.dimensions["instrument"]
478 dataId1 = {"instrument": "DummyCam"}
479 dataId2 = {"instrument": "DummyCam2"}
480 checkpointReached = False
481 with registry.transaction():
482 # This should be added and (ultimately) committed.
483 registry.insertDimensionData(dimension, dataId1)
484 with self.assertRaises(sqlalchemy.exc.IntegrityError):
485 with registry.transaction():
486 # This does not conflict, and should succeed (but not
487 # be committed).
488 registry.insertDimensionData(dimension, dataId2)
489 checkpointReached = True
490 # This should conflict and raise, triggerring a rollback
491 # of the previous insertion within the same transaction
492 # context, but not the original insertion in the outer
493 # block.
494 registry.insertDimensionData(dimension, dataId1)
495 self.assertTrue(checkpointReached)
496 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
497 with self.assertRaises(LookupError):
498 registry.expandDataId(dataId2, graph=dimension.graph)
500 def testInstrumentDimensions(self):
501 """Test queries involving only instrument dimensions, with no joins to
502 skymap."""
503 registry = self.makeRegistry()
505 # need a bunch of dimensions and datasets for test
506 registry.insertDimensionData(
507 "instrument",
508 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
509 )
510 registry.insertDimensionData(
511 "physical_filter",
512 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
513 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
514 )
515 registry.insertDimensionData(
516 "detector",
517 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
518 )
519 registry.insertDimensionData(
520 "visit",
521 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
522 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
523 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
524 )
525 registry.insertDimensionData(
526 "exposure",
527 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"),
528 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"),
529 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"),
530 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"),
531 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"),
532 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"),
533 )
534 # dataset types
535 run1 = "test1_r"
536 run2 = "test2_r"
537 tagged2 = "test2_t"
538 registry.registerRun(run1)
539 registry.registerRun(run2)
540 registry.registerCollection(tagged2)
541 storageClass = StorageClass("testDataset")
542 registry.storageClasses.registerStorageClass(storageClass)
543 rawType = DatasetType(name="RAW",
544 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
545 storageClass=storageClass)
546 registry.registerDatasetType(rawType)
547 calexpType = DatasetType(name="CALEXP",
548 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
549 storageClass=storageClass)
550 registry.registerDatasetType(calexpType)
552 # add pre-existing datasets
553 for exposure in (100, 101, 110, 111):
554 for detector in (1, 2, 3):
555 # note that only 3 of 5 detectors have datasets
556 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
557 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
558 # exposures 100 and 101 appear in both run1 and tagged2.
559 # 100 has different datasets in the different collections
560 # 101 has the same dataset in both collections.
561 if exposure == 100:
562 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
563 if exposure in (100, 101):
564 registry.associate(tagged2, [ref])
565 # Add pre-existing datasets to tagged2.
566 for exposure in (200, 201):
567 for detector in (3, 4, 5):
568 # note that only 3 of 5 detectors have datasets
569 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
570 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
571 registry.associate(tagged2, [ref])
573 dimensions = DimensionGraph(
574 registry.dimensions,
575 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
576 )
577 # Test that single dim string works as well as list of str
578 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True))
579 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True))
580 self.assertEqual(rows, rowsI)
581 # with empty expression
582 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True))
583 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
584 for dataId in rows:
585 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
586 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
587 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
588 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
589 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
590 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
591 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
592 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
593 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
594 (100, 101, 110, 111))
595 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
596 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
598 # second collection
599 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2))
600 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
601 for dataId in rows:
602 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
603 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
604 (100, 101, 200, 201))
605 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
606 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
608 # with two input datasets
609 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2]))
610 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
611 for dataId in rows:
612 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
613 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
614 (100, 101, 110, 111, 200, 201))
615 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
616 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
618 # limit to single visit
619 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
620 where="visit = 10"))
621 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
622 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
623 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
624 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
626 # more limiting expression, using link names instead of Table.column
627 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
628 where="visit = 10 and detector > 1"))
629 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
630 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
631 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
632 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
634 # expression excludes everything
635 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
636 where="visit > 1000"))
637 self.assertEqual(len(rows), 0)
639 # Selecting by physical_filter, this is not in the dimensions, but it
640 # is a part of the full expression so it should work too.
641 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
642 where="physical_filter = 'dummy_r'"))
643 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
644 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
645 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
646 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
648 def testSkyMapDimensions(self):
649 """Tests involving only skymap dimensions, no joins to instrument."""
650 registry = self.makeRegistry()
652 # need a bunch of dimensions and datasets for test, we want
653 # "abstract_filter" in the test so also have to add physical_filter
654 # dimensions
655 registry.insertDimensionData(
656 "instrument",
657 dict(instrument="DummyCam")
658 )
659 registry.insertDimensionData(
660 "physical_filter",
661 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
662 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
663 )
664 registry.insertDimensionData(
665 "skymap",
666 dict(name="DummyMap", hash="sha!".encode("utf8"))
667 )
668 for tract in range(10):
669 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
670 registry.insertDimensionData(
671 "patch",
672 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
673 for patch in range(10)]
674 )
676 # dataset types
677 run = "test"
678 registry.registerRun(run)
679 storageClass = StorageClass("testDataset")
680 registry.storageClasses.registerStorageClass(storageClass)
681 calexpType = DatasetType(name="deepCoadd_calexp",
682 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
683 "abstract_filter")),
684 storageClass=storageClass)
685 registry.registerDatasetType(calexpType)
686 mergeType = DatasetType(name="deepCoadd_mergeDet",
687 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
688 storageClass=storageClass)
689 registry.registerDatasetType(mergeType)
690 measType = DatasetType(name="deepCoadd_meas",
691 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
692 "abstract_filter")),
693 storageClass=storageClass)
694 registry.registerDatasetType(measType)
696 dimensions = DimensionGraph(
697 registry.dimensions,
698 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
699 | measType.dimensions.required)
700 )
702 # add pre-existing datasets
703 for tract in (1, 3, 5):
704 for patch in (2, 4, 6, 7):
705 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
706 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
707 for aFilter in ("i", "r"):
708 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
709 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
711 # with empty expression
712 rows = list(registry.queryDimensions(dimensions,
713 datasets=[calexpType, mergeType], collections=run))
714 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
715 for dataId in rows:
716 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
717 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
718 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
719 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
721 # limit to 2 tracts and 2 patches
722 rows = list(registry.queryDimensions(dimensions,
723 datasets=[calexpType, mergeType], collections=run,
724 where="tract IN (1, 5) AND patch IN (2, 7)"))
725 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
726 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
727 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
728 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
730 # limit to single filter
731 rows = list(registry.queryDimensions(dimensions,
732 datasets=[calexpType, mergeType], collections=run,
733 where="abstract_filter = 'i'"))
734 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
735 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
736 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
737 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
739 # expression excludes everything, specifying non-existing skymap is
740 # not a fatal error, it's operator error
741 rows = list(registry.queryDimensions(dimensions,
742 datasets=[calexpType, mergeType], collections=run,
743 where="skymap = 'Mars'"))
744 self.assertEqual(len(rows), 0)
746 def testSpatialMatch(self):
747 """Test involving spatial match using join tables.
749 Note that realistic test needs a reasonably-defined skypix and regions
750 in registry tables which is hard to implement in this simple test.
751 So we do not actually fill registry with any data and all queries will
752 return empty result, but this is still useful for coverage of the code
753 that generates query.
754 """
755 registry = self.makeRegistry()
757 # dataset types
758 collection = "test"
759 registry.registerRun(name=collection)
760 storageClass = StorageClass("testDataset")
761 registry.storageClasses.registerStorageClass(storageClass)
763 calexpType = DatasetType(name="CALEXP",
764 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
765 storageClass=storageClass)
766 registry.registerDatasetType(calexpType)
768 coaddType = DatasetType(name="deepCoadd_calexp",
769 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
770 "abstract_filter")),
771 storageClass=storageClass)
772 registry.registerDatasetType(coaddType)
774 dimensions = DimensionGraph(
775 registry.dimensions,
776 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
777 )
779 # without data this should run OK but return empty set
780 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection))
781 self.assertEqual(len(rows), 0)
783 def testCalibrationLabelIndirection(self):
784 """Test that we can look up datasets with calibration_label dimensions
785 from a data ID with exposure dimensions.
786 """
788 def _dt(iso_string):
789 return astropy.time.Time(iso_string, format="iso", scale="utc")
791 registry = self.makeRegistry()
793 flat = DatasetType(
794 "flat",
795 registry.dimensions.extract(
796 ["instrument", "detector", "physical_filter", "calibration_label"]
797 ),
798 "ImageU"
799 )
800 registry.registerDatasetType(flat)
801 registry.insertDimensionData("instrument", dict(name="DummyCam"))
802 registry.insertDimensionData(
803 "physical_filter",
804 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
805 )
806 registry.insertDimensionData(
807 "detector",
808 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
809 )
810 registry.insertDimensionData(
811 "visit",
812 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
813 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"),
814 )
815 registry.insertDimensionData(
816 "exposure",
817 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i",
818 datetime_begin=_dt("2005-12-15 02:00:00"), datetime_end=_dt("2005-12-15 03:00:00")),
819 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i",
820 datetime_begin=_dt("2005-12-16 02:00:00"), datetime_end=_dt("2005-12-16 03:00:00")),
821 )
822 registry.insertDimensionData(
823 "calibration_label",
824 dict(instrument="DummyCam", name="first_night",
825 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-15 04:00:00")),
826 dict(instrument="DummyCam", name="second_night",
827 datetime_begin=_dt("2005-12-16 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
828 dict(instrument="DummyCam", name="both_nights",
829 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
830 )
831 # Different flats for different nights for detectors 1-3 in first
832 # collection.
833 run1 = "calibs1"
834 registry.registerRun(run1)
835 for detector in (1, 2, 3):
836 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
837 physical_filter="dummy_i", detector=detector)],
838 run=run1)
839 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
840 physical_filter="dummy_i", detector=detector)],
841 run=run1)
842 # The same flat for both nights for detectors 3-5 (so detector 3 has
843 # multiple valid flats) in second collection.
844 run2 = "calib2"
845 registry.registerRun(run2)
846 for detector in (3, 4, 5):
847 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
848 physical_filter="dummy_i", detector=detector)],
849 run=run2)
850 # Perform queries for individual exposure+detector combinations, which
851 # should always return exactly one flat.
852 for exposure in (100, 101):
853 for detector in (1, 2, 3):
854 with self.subTest(exposure=exposure, detector=detector):
855 rows = list(registry.queryDatasets("flat", collections=[run1],
856 instrument="DummyCam",
857 exposure=exposure,
858 detector=detector))
859 self.assertEqual(len(rows), 1)
860 for detector in (3, 4, 5):
861 with self.subTest(exposure=exposure, detector=detector):
862 rows = registry.queryDatasets("flat", collections=[run2],
863 instrument="DummyCam",
864 exposure=exposure,
865 detector=detector)
866 self.assertEqual(len(list(rows)), 1)
867 for detector in (1, 2, 4, 5):
868 with self.subTest(exposure=exposure, detector=detector):
869 rows = registry.queryDatasets("flat", collections=[run1, run2],
870 instrument="DummyCam",
871 exposure=exposure,
872 detector=detector)
873 self.assertEqual(len(list(rows)), 1)
874 for detector in (3,):
875 with self.subTest(exposure=exposure, detector=detector):
876 rows = registry.queryDatasets("flat", collections=[run1, run2],
877 instrument="DummyCam",
878 exposure=exposure,
879 detector=detector)
880 self.assertEqual(len(list(rows)), 2)
882 def testAbstractFilterQuery(self):
883 """Test that we can run a query that just lists the known
884 abstract_filters. This is tricky because abstract_filter is
885 backed by a query against physical_filter.
886 """
887 registry = self.makeRegistry()
888 registry.insertDimensionData("instrument", dict(name="DummyCam"))
889 registry.insertDimensionData(
890 "physical_filter",
891 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
892 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
893 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
894 )
895 rows = list(registry.queryDimensions(["abstract_filter"]))
896 self.assertCountEqual(
897 rows,
898 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
899 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
900 )