Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from datetime import datetime
27import os
29import sqlalchemy
31from ...core import (
32 DataCoordinate,
33 DatasetType,
34 DimensionGraph,
35 StorageClass,
36 ddl,
37 YamlRepoImportBackend
38)
39from .._registry import Registry, CollectionType, ConflictingDefinitionError, OrphanedRecordError
40from ..wildcards import DatasetTypeRestriction
43class RegistryTests(ABC):
44 """Generic tests for the `Registry` class that can be subclassed to
45 generate tests for different configurations.
46 """
48 @classmethod
49 @abstractmethod
50 def getDataDir(cls) -> str:
51 """Return the root directory containing test data YAML files.
52 """
53 raise NotImplementedError()
55 @abstractmethod
56 def makeRegistry(self) -> Registry:
57 """Return the Registry instance to be tested.
58 """
59 raise NotImplementedError()
61 def loadData(self, registry: Registry, filename: str):
62 """Load registry test data from ``getDataDir/<filename>``,
63 which should be a YAML import/export file.
64 """
65 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
66 backend = YamlRepoImportBackend(stream, registry)
67 backend.register()
68 backend.load(datastore=None)
70 def assertRowCount(self, registry: Registry, table: str, count: int):
71 """Check the number of rows in table.
72 """
73 # TODO: all tests that rely on this method should be rewritten, as it
74 # needs to depend on Registry implementation details to have any chance
75 # of working.
76 sql = sqlalchemy.sql.select(
77 [sqlalchemy.sql.func.count()]
78 ).select_from(
79 getattr(registry._tables, table)
80 )
81 self.assertEqual(registry._db.query(sql).scalar(), count)
83 def testOpaque(self):
84 """Tests for `Registry.registerOpaqueTable`,
85 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
86 `Registry.deleteOpaqueData`.
87 """
88 registry = self.makeRegistry()
89 table = "opaque_table_for_testing"
90 registry.registerOpaqueTable(
91 table,
92 spec=ddl.TableSpec(
93 fields=[
94 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
95 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
96 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
97 ],
98 )
99 )
100 rows = [
101 {"id": 1, "name": "one", "count": None},
102 {"id": 2, "name": "two", "count": 5},
103 {"id": 3, "name": "three", "count": 6},
104 ]
105 registry.insertOpaqueData(table, *rows)
106 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
107 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
108 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
109 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
110 registry.deleteOpaqueData(table, id=3)
111 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
112 registry.deleteOpaqueData(table)
113 self.assertEqual([], list(registry.fetchOpaqueData(table)))
115 def testDatasetType(self):
116 """Tests for `Registry.registerDatasetType` and
117 `Registry.getDatasetType`.
118 """
119 registry = self.makeRegistry()
120 # Check valid insert
121 datasetTypeName = "test"
122 storageClass = StorageClass("testDatasetType")
123 registry.storageClasses.registerStorageClass(storageClass)
124 dimensions = registry.dimensions.extract(("instrument", "visit"))
125 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
126 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
127 # Inserting for the first time should return True
128 self.assertTrue(registry.registerDatasetType(inDatasetType))
129 outDatasetType1 = registry.getDatasetType(datasetTypeName)
130 self.assertEqual(outDatasetType1, inDatasetType)
132 # Re-inserting should work
133 self.assertFalse(registry.registerDatasetType(inDatasetType))
134 # Except when they are not identical
135 with self.assertRaises(ConflictingDefinitionError):
136 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
137 registry.registerDatasetType(nonIdenticalDatasetType)
139 # Template can be None
140 datasetTypeName = "testNoneTemplate"
141 storageClass = StorageClass("testDatasetType2")
142 registry.storageClasses.registerStorageClass(storageClass)
143 dimensions = registry.dimensions.extract(("instrument", "visit"))
144 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
145 registry.registerDatasetType(inDatasetType)
146 outDatasetType2 = registry.getDatasetType(datasetTypeName)
147 self.assertEqual(outDatasetType2, inDatasetType)
149 allTypes = set(registry.queryDatasetTypes())
150 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
152 def testDimensions(self):
153 """Tests for `Registry.insertDimensionData` and
154 `Registry.expandDataId`.
155 """
156 registry = self.makeRegistry()
157 dimensionName = "instrument"
158 dimension = registry.dimensions[dimensionName]
159 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2}
160 registry.insertDimensionData(dimensionName, dimensionValue)
161 # Inserting the same value twice should fail
162 with self.assertRaises(sqlalchemy.exc.IntegrityError):
163 registry.insertDimensionData(dimensionName, dimensionValue)
164 # expandDataId should retrieve the record we just inserted
165 self.assertEqual(
166 registry.expandDataId(
167 instrument="DummyCam",
168 graph=dimension.graph
169 ).records[dimensionName].toDict(),
170 dimensionValue
171 )
172 # expandDataId should raise if there is no record with the given ID.
173 with self.assertRaises(LookupError):
174 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
175 # abstract_filter doesn't have a table; insert should fail.
176 with self.assertRaises(TypeError):
177 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
178 dimensionName2 = "physical_filter"
179 dimension2 = registry.dimensions[dimensionName2]
180 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
181 # Missing required dependency ("instrument") should fail
182 with self.assertRaises(sqlalchemy.exc.IntegrityError):
183 registry.insertDimensionData(dimensionName2, dimensionValue2)
184 # Adding required dependency should fix the failure
185 dimensionValue2["instrument"] = "DummyCam"
186 registry.insertDimensionData(dimensionName2, dimensionValue2)
187 # expandDataId should retrieve the record we just inserted.
188 self.assertEqual(
189 registry.expandDataId(
190 instrument="DummyCam", physical_filter="DummyCam_i",
191 graph=dimension2.graph
192 ).records[dimensionName2].toDict(),
193 dimensionValue2
194 )
196 def testDataset(self):
197 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
198 and `Registry.removeDataset`.
199 """
200 registry = self.makeRegistry()
201 self.loadData(registry, "base.yaml")
202 run = "test"
203 registry.registerRun(run)
204 datasetType = registry.getDatasetType("permabias")
205 dataId = {"instrument": "Cam1", "detector": 2}
206 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
207 outRef = registry.getDataset(ref.id)
208 self.assertIsNotNone(ref.id)
209 self.assertEqual(ref, outRef)
210 with self.assertRaises(ConflictingDefinitionError):
211 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
212 registry.removeDataset(ref)
213 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
215 def testComponents(self):
216 """Tests for `Registry.attachComponent` and other dataset operations
217 on composite datasets.
218 """
219 registry = self.makeRegistry()
220 self.loadData(registry, "base.yaml")
221 run = "test"
222 registry.registerRun(run)
223 parentDatasetType = registry.getDatasetType("permabias")
224 childDatasetType1 = registry.getDatasetType("permabias.image")
225 childDatasetType2 = registry.getDatasetType("permabias.mask")
226 dataId = {"instrument": "Cam1", "detector": 2}
227 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run)
228 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0],
229 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]}
230 for name, child in children.items():
231 registry.attachComponent(name, parent, child)
232 self.assertEqual(parent.components, children)
233 outParent = registry.getDataset(parent.id)
234 self.assertEqual(outParent.components, children)
235 # Remove the parent; this should remove all children.
236 registry.removeDataset(parent)
237 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run]))
238 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run]))
239 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run]))
241 def testFindDataset(self):
242 """Tests for `Registry.findDataset`.
243 """
244 registry = self.makeRegistry()
245 self.loadData(registry, "base.yaml")
246 run = "test"
247 datasetType = registry.getDatasetType("permabias")
248 dataId = {"instrument": "Cam1", "detector": 4}
249 registry.registerRun(run)
250 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
251 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
252 self.assertEqual(outputRef, inputRef)
253 # Check that retrieval with invalid dataId raises
254 with self.assertRaises(LookupError):
255 dataId = {"instrument": "Cam1"} # no detector
256 registry.findDataset(datasetType, dataId, collections=run)
257 # Check that different dataIds match to different datasets
258 dataId1 = {"instrument": "Cam1", "detector": 1}
259 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
260 dataId2 = {"instrument": "Cam1", "detector": 2}
261 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
262 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
263 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
264 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
265 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
266 # Check that requesting a non-existing dataId returns None
267 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
268 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
270 def testCollections(self):
271 """Tests for registry methods that manage collections.
272 """
273 registry = self.makeRegistry()
274 self.loadData(registry, "base.yaml")
275 self.loadData(registry, "datasets.yaml")
276 run1 = "imported_g"
277 run2 = "imported_r"
278 datasetType = "permabias"
279 # Find some datasets via their run's collection.
280 dataId1 = {"instrument": "Cam1", "detector": 1}
281 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
282 self.assertIsNotNone(ref1)
283 dataId2 = {"instrument": "Cam1", "detector": 2}
284 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
285 self.assertIsNotNone(ref2)
286 # Associate those into a new collection,then look for them there.
287 tag1 = "tag1"
288 registry.registerCollection(tag1, type=CollectionType.TAGGED)
289 registry.associate(tag1, [ref1, ref2])
290 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
291 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
292 # Disassociate one and verify that we can't it there anymore...
293 registry.disassociate(tag1, [ref1])
294 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
295 # ...but we can still find ref2 in tag1, and ref1 in the run.
296 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
297 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
298 collections = set(registry.queryCollections())
299 self.assertEqual(collections, {run1, run2, tag1})
300 # Associate both refs into tag1 again; ref2 is already there, but that
301 # should be a harmless no-op.
302 registry.associate(tag1, [ref1, ref2])
303 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
304 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
305 # Get a different dataset (from a different run) that has the same
306 # dataset type and data ID as ref2.
307 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
308 self.assertNotEqual(ref2, ref2b)
309 # Attempting to associate that into tag1 should be an error.
310 with self.assertRaises(ConflictingDefinitionError):
311 registry.associate(tag1, [ref2b])
312 # That error shouldn't have messed up what we had before.
313 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
314 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
315 # Attempt to associate the conflicting dataset again, this time with
316 # a dataset that isn't in the collection and won't cause a conflict.
317 # Should also fail without modifying anything.
318 dataId3 = {"instrument": "Cam1", "detector": 3}
319 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
320 with self.assertRaises(ConflictingDefinitionError):
321 registry.associate(tag1, [ref3, ref2b])
322 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
323 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
324 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
325 # Register a chained collection that searches:
326 # 1. 'tag1'
327 # 2. 'run1', but only for the permaflat dataset
328 # 3. 'run2'
329 chain1 = "chain1"
330 registry.registerCollection(chain1, type=CollectionType.CHAINED)
331 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
332 # Chained collection exists, but has no collections in it.
333 self.assertFalse(registry.getCollectionChain(chain1))
334 # Attempt to set its child collections to something circular; that
335 # should fail.
336 with self.assertRaises(ValueError):
337 registry.setCollectionChain(chain1, [tag1, chain1])
338 # Add the child collections.
339 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2])
340 self.assertEqual(
341 list(registry.getCollectionChain(chain1)),
342 [(tag1, DatasetTypeRestriction.any),
343 (run1, DatasetTypeRestriction.fromExpression("permaflat")),
344 (run2, DatasetTypeRestriction.any)]
345 )
346 # Searching for dataId1 or dataId2 in the chain should return ref1 and
347 # ref2, because both are in tag1.
348 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
349 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
350 # Now disassociate ref2 from tag1. The search (for permabias) with
351 # dataId2 in chain1 should then:
352 # 1. not find it in tag1
353 # 2. not look in tag2, because it's restricted to permaflat here
354 # 3. find a different dataset in run2
355 registry.disassociate(tag1, [ref2])
356 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
357 self.assertNotEqual(ref2b, ref2)
358 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
359 # Look in the chain for a permaflat that is in run1; should get the
360 # same ref as if we'd searched run1 directly.
361 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}
362 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1),
363 registry.findDataset("permaflat", dataId3, collections=run1),)
364 # Define a new chain so we can test recursive chains.
365 chain2 = "chain2"
366 registry.registerCollection(chain2, type=CollectionType.CHAINED)
367 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1])
368 # Search for permabias with dataId1 should find it via tag1 in chain2,
369 # recursing, because is not in run1.
370 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
371 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
372 # Search for permabias with dataId2 should find it in run2 (ref2b).
373 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
374 # Search for a permaflat that is in run2. That should not be found
375 # at the front of chain2, because of the restriction to permabias
376 # on run2 there, but it should be found in at the end of chain1.
377 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
378 ref4 = registry.findDataset("permaflat", dataId4, collections=run2)
379 self.assertIsNotNone(ref4)
380 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2))
382 def testDatasetLocations(self):
383 """Tests for `Registry.insertDatasetLocations`,
384 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`.
385 """
386 registry = self.makeRegistry()
387 self.loadData(registry, "base.yaml")
388 self.loadData(registry, "datasets.yaml")
389 run = "imported_g"
390 ref = registry.findDataset("permabias", dataId={"instrument": "Cam1", "detector": 1}, collections=run)
391 ref2 = registry.findDataset("permaflat",
392 dataId={"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-G"},
393 collections=run)
394 datastoreName = "dummystore"
395 datastoreName2 = "dummystore2"
396 # Test adding information about a new dataset
397 registry.insertDatasetLocations(datastoreName, [ref])
398 addresses = registry.getDatasetLocations(ref)
399 self.assertIn(datastoreName, addresses)
400 self.assertEqual(len(addresses), 1)
401 registry.insertDatasetLocations(datastoreName2, [ref, ref2])
402 addresses = registry.getDatasetLocations(ref)
403 self.assertEqual(len(addresses), 2)
404 self.assertIn(datastoreName, addresses)
405 self.assertIn(datastoreName2, addresses)
406 registry.removeDatasetLocation(datastoreName, ref)
407 addresses = registry.getDatasetLocations(ref)
408 self.assertEqual(len(addresses), 1)
409 self.assertNotIn(datastoreName, addresses)
410 self.assertIn(datastoreName2, addresses)
411 with self.assertRaises(OrphanedRecordError):
412 registry.removeDataset(ref)
413 registry.removeDatasetLocation(datastoreName2, ref)
414 addresses = registry.getDatasetLocations(ref)
415 self.assertEqual(len(addresses), 0)
416 self.assertNotIn(datastoreName2, addresses)
417 registry.removeDataset(ref) # should not raise
418 addresses = registry.getDatasetLocations(ref2)
419 self.assertEqual(len(addresses), 1)
420 self.assertIn(datastoreName2, addresses)
422 def testBasicTransaction(self):
423 """Test that all operations within a single transaction block are
424 rolled back if an exception propagates out of the block.
425 """
426 registry = self.makeRegistry()
427 storageClass = StorageClass("testDatasetType")
428 registry.storageClasses.registerStorageClass(storageClass)
429 dimensions = registry.dimensions.extract(("instrument",))
430 dataId = {"instrument": "DummyCam"}
431 datasetTypeA = DatasetType(name="A",
432 dimensions=dimensions,
433 storageClass=storageClass)
434 datasetTypeB = DatasetType(name="B",
435 dimensions=dimensions,
436 storageClass=storageClass)
437 datasetTypeC = DatasetType(name="C",
438 dimensions=dimensions,
439 storageClass=storageClass)
440 run = "test"
441 registry.registerRun(run)
442 refId = None
443 with registry.transaction():
444 registry.registerDatasetType(datasetTypeA)
445 with self.assertRaises(ValueError):
446 with registry.transaction():
447 registry.registerDatasetType(datasetTypeB)
448 registry.registerDatasetType(datasetTypeC)
449 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
450 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run)
451 refId = ref.id
452 raise ValueError("Oops, something went wrong")
453 # A should exist
454 self.assertEqual(registry.getDatasetType("A"), datasetTypeA)
455 # But B and C should both not exist
456 with self.assertRaises(KeyError):
457 registry.getDatasetType("B")
458 with self.assertRaises(KeyError):
459 registry.getDatasetType("C")
460 # And neither should the dataset
461 self.assertIsNotNone(refId)
462 self.assertIsNone(registry.getDataset(refId))
463 # Or the Dimension entries
464 with self.assertRaises(LookupError):
465 registry.expandDataId({"instrument": "DummyCam"})
467 def testNestedTransaction(self):
468 """Test that operations within a transaction block are not rolled back
469 if an exception propagates out of an inner transaction block and is
470 then caught.
471 """
472 registry = self.makeRegistry()
473 dimension = registry.dimensions["instrument"]
474 dataId1 = {"instrument": "DummyCam"}
475 dataId2 = {"instrument": "DummyCam2"}
476 checkpointReached = False
477 with registry.transaction():
478 # This should be added and (ultimately) committed.
479 registry.insertDimensionData(dimension, dataId1)
480 with self.assertRaises(sqlalchemy.exc.IntegrityError):
481 with registry.transaction():
482 # This does not conflict, and should succeed (but not
483 # be committed).
484 registry.insertDimensionData(dimension, dataId2)
485 checkpointReached = True
486 # This should conflict and raise, triggerring a rollback
487 # of the previous insertion within the same transaction
488 # context, but not the original insertion in the outer
489 # block.
490 registry.insertDimensionData(dimension, dataId1)
491 self.assertTrue(checkpointReached)
492 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
493 with self.assertRaises(LookupError):
494 registry.expandDataId(dataId2, graph=dimension.graph)
496 def testInstrumentDimensions(self):
497 """Test queries involving only instrument dimensions, with no joins to
498 skymap."""
499 registry = self.makeRegistry()
501 # need a bunch of dimensions and datasets for test
502 registry.insertDimensionData(
503 "instrument",
504 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
505 )
506 registry.insertDimensionData(
507 "physical_filter",
508 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
509 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
510 )
511 registry.insertDimensionData(
512 "detector",
513 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
514 )
515 registry.insertDimensionData(
516 "visit",
517 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
518 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
519 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
520 )
521 registry.insertDimensionData(
522 "exposure",
523 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"),
524 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"),
525 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"),
526 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"),
527 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"),
528 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"),
529 )
530 # dataset types
531 run1 = "test1_r"
532 run2 = "test2_r"
533 tagged2 = "test2_t"
534 registry.registerRun(run1)
535 registry.registerRun(run2)
536 registry.registerCollection(tagged2)
537 storageClass = StorageClass("testDataset")
538 registry.storageClasses.registerStorageClass(storageClass)
539 rawType = DatasetType(name="RAW",
540 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
541 storageClass=storageClass)
542 registry.registerDatasetType(rawType)
543 calexpType = DatasetType(name="CALEXP",
544 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
545 storageClass=storageClass)
546 registry.registerDatasetType(calexpType)
548 # add pre-existing datasets
549 for exposure in (100, 101, 110, 111):
550 for detector in (1, 2, 3):
551 # note that only 3 of 5 detectors have datasets
552 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
553 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
554 # exposures 100 and 101 appear in both run1 and tagged2.
555 # 100 has different datasets in the different collections
556 # 101 has the same dataset in both collections.
557 if exposure == 100:
558 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
559 if exposure in (100, 101):
560 registry.associate(tagged2, [ref])
561 # Add pre-existing datasets to tagged2.
562 for exposure in (200, 201):
563 for detector in (3, 4, 5):
564 # note that only 3 of 5 detectors have datasets
565 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
566 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
567 registry.associate(tagged2, [ref])
569 dimensions = DimensionGraph(
570 registry.dimensions,
571 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
572 )
573 # Test that single dim string works as well as list of str
574 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True))
575 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True))
576 self.assertEqual(rows, rowsI)
577 # with empty expression
578 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True))
579 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
580 for dataId in rows:
581 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
582 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
583 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
584 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
585 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
586 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
587 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
588 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
589 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
590 (100, 101, 110, 111))
591 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
592 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
594 # second collection
595 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2))
596 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
597 for dataId in rows:
598 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
599 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
600 (100, 101, 200, 201))
601 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
602 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
604 # with two input datasets
605 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2]))
606 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
607 for dataId in rows:
608 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
609 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
610 (100, 101, 110, 111, 200, 201))
611 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
612 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
614 # limit to single visit
615 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
616 where="visit = 10"))
617 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
618 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
619 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
620 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
622 # more limiting expression, using link names instead of Table.column
623 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
624 where="visit = 10 and detector > 1"))
625 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
626 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
627 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
628 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
630 # expression excludes everything
631 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
632 where="visit > 1000"))
633 self.assertEqual(len(rows), 0)
635 # Selecting by physical_filter, this is not in the dimensions, but it
636 # is a part of the full expression so it should work too.
637 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
638 where="physical_filter = 'dummy_r'"))
639 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
640 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
641 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
642 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
644 def testSkyMapDimensions(self):
645 """Tests involving only skymap dimensions, no joins to instrument."""
646 registry = self.makeRegistry()
648 # need a bunch of dimensions and datasets for test, we want
649 # "abstract_filter" in the test so also have to add physical_filter
650 # dimensions
651 registry.insertDimensionData(
652 "instrument",
653 dict(instrument="DummyCam")
654 )
655 registry.insertDimensionData(
656 "physical_filter",
657 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
658 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
659 )
660 registry.insertDimensionData(
661 "skymap",
662 dict(name="DummyMap", hash="sha!".encode("utf8"))
663 )
664 for tract in range(10):
665 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
666 registry.insertDimensionData(
667 "patch",
668 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
669 for patch in range(10)]
670 )
672 # dataset types
673 run = "test"
674 registry.registerRun(run)
675 storageClass = StorageClass("testDataset")
676 registry.storageClasses.registerStorageClass(storageClass)
677 calexpType = DatasetType(name="deepCoadd_calexp",
678 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
679 "abstract_filter")),
680 storageClass=storageClass)
681 registry.registerDatasetType(calexpType)
682 mergeType = DatasetType(name="deepCoadd_mergeDet",
683 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
684 storageClass=storageClass)
685 registry.registerDatasetType(mergeType)
686 measType = DatasetType(name="deepCoadd_meas",
687 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
688 "abstract_filter")),
689 storageClass=storageClass)
690 registry.registerDatasetType(measType)
692 dimensions = DimensionGraph(
693 registry.dimensions,
694 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
695 | measType.dimensions.required)
696 )
698 # add pre-existing datasets
699 for tract in (1, 3, 5):
700 for patch in (2, 4, 6, 7):
701 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
702 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
703 for aFilter in ("i", "r"):
704 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
705 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
707 # with empty expression
708 rows = list(registry.queryDimensions(dimensions,
709 datasets=[calexpType, mergeType], collections=run))
710 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
711 for dataId in rows:
712 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
713 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
714 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
715 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
717 # limit to 2 tracts and 2 patches
718 rows = list(registry.queryDimensions(dimensions,
719 datasets=[calexpType, mergeType], collections=run,
720 where="tract IN (1, 5) AND patch IN (2, 7)"))
721 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
722 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
723 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
724 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
726 # limit to single filter
727 rows = list(registry.queryDimensions(dimensions,
728 datasets=[calexpType, mergeType], collections=run,
729 where="abstract_filter = 'i'"))
730 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
731 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
732 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
733 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
735 # expression excludes everything, specifying non-existing skymap is
736 # not a fatal error, it's operator error
737 rows = list(registry.queryDimensions(dimensions,
738 datasets=[calexpType, mergeType], collections=run,
739 where="skymap = 'Mars'"))
740 self.assertEqual(len(rows), 0)
742 def testSpatialMatch(self):
743 """Test involving spatial match using join tables.
745 Note that realistic test needs a reasonably-defined skypix and regions
746 in registry tables which is hard to implement in this simple test.
747 So we do not actually fill registry with any data and all queries will
748 return empty result, but this is still useful for coverage of the code
749 that generates query.
750 """
751 registry = self.makeRegistry()
753 # dataset types
754 collection = "test"
755 registry.registerRun(name=collection)
756 storageClass = StorageClass("testDataset")
757 registry.storageClasses.registerStorageClass(storageClass)
759 calexpType = DatasetType(name="CALEXP",
760 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
761 storageClass=storageClass)
762 registry.registerDatasetType(calexpType)
764 coaddType = DatasetType(name="deepCoadd_calexp",
765 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
766 "abstract_filter")),
767 storageClass=storageClass)
768 registry.registerDatasetType(coaddType)
770 dimensions = DimensionGraph(
771 registry.dimensions,
772 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
773 )
775 # without data this should run OK but return empty set
776 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection))
777 self.assertEqual(len(rows), 0)
779 def testCalibrationLabelIndirection(self):
780 """Test that we can look up datasets with calibration_label dimensions
781 from a data ID with exposure dimensions.
782 """
783 registry = self.makeRegistry()
785 flat = DatasetType(
786 "flat",
787 registry.dimensions.extract(
788 ["instrument", "detector", "physical_filter", "calibration_label"]
789 ),
790 "ImageU"
791 )
792 registry.registerDatasetType(flat)
793 registry.insertDimensionData("instrument", dict(name="DummyCam"))
794 registry.insertDimensionData(
795 "physical_filter",
796 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
797 )
798 registry.insertDimensionData(
799 "detector",
800 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
801 )
802 registry.insertDimensionData(
803 "visit",
804 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
805 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"),
806 )
807 registry.insertDimensionData(
808 "exposure",
809 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i",
810 datetime_begin=datetime(2005, 12, 15, 2), datetime_end=datetime(2005, 12, 15, 3)),
811 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i",
812 datetime_begin=datetime(2005, 12, 16, 2), datetime_end=datetime(2005, 12, 16, 3)),
813 )
814 registry.insertDimensionData(
815 "calibration_label",
816 dict(instrument="DummyCam", name="first_night",
817 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 15, 4)),
818 dict(instrument="DummyCam", name="second_night",
819 datetime_begin=datetime(2005, 12, 16, 1), datetime_end=datetime(2005, 12, 16, 4)),
820 dict(instrument="DummyCam", name="both_nights",
821 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 16, 4)),
822 )
823 # Different flats for different nights for detectors 1-3 in first
824 # collection.
825 run1 = "calibs1"
826 registry.registerRun(run1)
827 for detector in (1, 2, 3):
828 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
829 physical_filter="dummy_i", detector=detector)],
830 run=run1)
831 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
832 physical_filter="dummy_i", detector=detector)],
833 run=run1)
834 # The same flat for both nights for detectors 3-5 (so detector 3 has
835 # multiple valid flats) in second collection.
836 run2 = "calib2"
837 registry.registerRun(run2)
838 for detector in (3, 4, 5):
839 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
840 physical_filter="dummy_i", detector=detector)],
841 run=run2)
842 # Perform queries for individual exposure+detector combinations, which
843 # should always return exactly one flat.
844 for exposure in (100, 101):
845 for detector in (1, 2, 3):
846 with self.subTest(exposure=exposure, detector=detector):
847 rows = list(registry.queryDatasets("flat", collections=[run1],
848 instrument="DummyCam",
849 exposure=exposure,
850 detector=detector))
851 self.assertEqual(len(rows), 1)
852 for detector in (3, 4, 5):
853 with self.subTest(exposure=exposure, detector=detector):
854 rows = registry.queryDatasets("flat", collections=[run2],
855 instrument="DummyCam",
856 exposure=exposure,
857 detector=detector)
858 self.assertEqual(len(list(rows)), 1)
859 for detector in (1, 2, 4, 5):
860 with self.subTest(exposure=exposure, detector=detector):
861 rows = registry.queryDatasets("flat", collections=[run1, run2],
862 instrument="DummyCam",
863 exposure=exposure,
864 detector=detector)
865 self.assertEqual(len(list(rows)), 1)
866 for detector in (3,):
867 with self.subTest(exposure=exposure, detector=detector):
868 rows = registry.queryDatasets("flat", collections=[run1, run2],
869 instrument="DummyCam",
870 exposure=exposure,
871 detector=detector)
872 self.assertEqual(len(list(rows)), 2)
874 def testAbstractFilterQuery(self):
875 """Test that we can run a query that just lists the known
876 abstract_filters. This is tricky because abstract_filter is
877 backed by a query against physical_filter.
878 """
879 registry = self.makeRegistry()
880 registry.insertDimensionData("instrument", dict(name="DummyCam"))
881 registry.insertDimensionData(
882 "physical_filter",
883 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
884 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
885 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
886 )
887 rows = list(registry.queryDimensions(["abstract_filter"]))
888 self.assertCountEqual(
889 rows,
890 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
891 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
892 )