Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 4%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from datetime import datetime
28import sqlalchemy
30from ...core import (
31 DataCoordinate,
32 DatasetType,
33 DimensionGraph,
34 StorageClass,
35 ddl,
36)
37from .._registry import Registry, ConflictingDefinitionError, OrphanedRecordError
40class RegistryTests(ABC):
41 """Generic tests for the `Registry` class that can be subclassed to
42 generate tests for different configurations.
43 """
45 @abstractmethod
46 def makeRegistry(self) -> Registry:
47 raise NotImplementedError()
49 def assertRowCount(self, registry: Registry, table: str, count: int):
50 """Check the number of rows in table.
51 """
52 # TODO: all tests that rely on this method should be rewritten, as it
53 # needs to depend on Registry implementation details to have any chance
54 # of working.
55 sql = sqlalchemy.sql.select(
56 [sqlalchemy.sql.func.count()]
57 ).select_from(
58 getattr(registry._tables, table)
59 )
60 self.assertEqual(registry._db.query(sql).scalar(), count)
62 def testOpaque(self):
63 """Tests for `Registry.registerOpaqueTable`,
64 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
65 `Registry.deleteOpaqueData`.
66 """
67 registry = self.makeRegistry()
68 table = "opaque_table_for_testing"
69 registry.registerOpaqueTable(
70 table,
71 spec=ddl.TableSpec(
72 fields=[
73 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
74 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
75 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
76 ],
77 )
78 )
79 rows = [
80 {"id": 1, "name": "one", "count": None},
81 {"id": 2, "name": "two", "count": 5},
82 {"id": 3, "name": "three", "count": 6},
83 ]
84 registry.insertOpaqueData(table, *rows)
85 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
86 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
87 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
88 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
89 registry.deleteOpaqueData(table, id=3)
90 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
91 registry.deleteOpaqueData(table)
92 self.assertEqual([], list(registry.fetchOpaqueData(table)))
94 def testDatasetType(self):
95 """Tests for `Registry.registerDatasetType` and
96 `Registry.getDatasetType`.
97 """
98 registry = self.makeRegistry()
99 # Check valid insert
100 datasetTypeName = "test"
101 storageClass = StorageClass("testDatasetType")
102 registry.storageClasses.registerStorageClass(storageClass)
103 dimensions = registry.dimensions.extract(("instrument", "visit"))
104 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
105 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
106 # Inserting for the first time should return True
107 self.assertTrue(registry.registerDatasetType(inDatasetType))
108 outDatasetType1 = registry.getDatasetType(datasetTypeName)
109 self.assertEqual(outDatasetType1, inDatasetType)
111 # Re-inserting should work
112 self.assertFalse(registry.registerDatasetType(inDatasetType))
113 # Except when they are not identical
114 with self.assertRaises(ConflictingDefinitionError):
115 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
116 registry.registerDatasetType(nonIdenticalDatasetType)
118 # Template can be None
119 datasetTypeName = "testNoneTemplate"
120 storageClass = StorageClass("testDatasetType2")
121 registry.storageClasses.registerStorageClass(storageClass)
122 dimensions = registry.dimensions.extract(("instrument", "visit"))
123 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
124 registry.registerDatasetType(inDatasetType)
125 outDatasetType2 = registry.getDatasetType(datasetTypeName)
126 self.assertEqual(outDatasetType2, inDatasetType)
128 allTypes = registry.getAllDatasetTypes()
129 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
131 def testDimensions(self):
132 """Tests for `Registry.insertDimensionData` and
133 `Registry.expandDataId`.
134 """
135 registry = self.makeRegistry()
136 dimensionName = "instrument"
137 dimension = registry.dimensions[dimensionName]
138 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2}
139 registry.insertDimensionData(dimensionName, dimensionValue)
140 # Inserting the same value twice should fail
141 with self.assertRaises(sqlalchemy.exc.IntegrityError):
142 registry.insertDimensionData(dimensionName, dimensionValue)
143 # expandDataId should retrieve the record we just inserted
144 self.assertEqual(
145 registry.expandDataId(
146 instrument="DummyCam",
147 graph=dimension.graph
148 ).records[dimensionName].toDict(),
149 dimensionValue
150 )
151 # expandDataId should raise if there is no record with the given ID.
152 with self.assertRaises(LookupError):
153 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
154 # abstract_filter doesn't have a table; insert should fail.
155 with self.assertRaises(TypeError):
156 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
157 dimensionName2 = "physical_filter"
158 dimension2 = registry.dimensions[dimensionName2]
159 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
160 # Missing required dependency ("instrument") should fail
161 with self.assertRaises(sqlalchemy.exc.IntegrityError):
162 registry.insertDimensionData(dimensionName2, dimensionValue2)
163 # Adding required dependency should fix the failure
164 dimensionValue2["instrument"] = "DummyCam"
165 registry.insertDimensionData(dimensionName2, dimensionValue2)
166 # expandDataId should retrieve the record we just inserted.
167 self.assertEqual(
168 registry.expandDataId(
169 instrument="DummyCam", physical_filter="DummyCam_i",
170 graph=dimension2.graph
171 ).records[dimensionName2].toDict(),
172 dimensionValue2
173 )
175 def testDataset(self):
176 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
177 and `Registry.removeDataset`.
178 """
179 registry = self.makeRegistry()
180 run = "test"
181 registry.registerRun(run)
182 storageClass = StorageClass("testDataset")
183 registry.storageClasses.registerStorageClass(storageClass)
184 datasetType = DatasetType(name="testtype", dimensions=registry.dimensions.extract(("instrument",)),
185 storageClass=storageClass)
186 registry.registerDatasetType(datasetType)
187 dataId = {"instrument": "DummyCam"}
188 registry.insertDimensionData("instrument", dataId)
189 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
190 outRef = registry.getDataset(ref.id)
191 self.assertIsNotNone(ref.id)
192 self.assertEqual(ref, outRef)
193 with self.assertRaises(ConflictingDefinitionError):
194 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
195 registry.removeDataset(ref)
196 self.assertIsNone(registry.find(run, datasetType, dataId))
198 def testComponents(self):
199 """Tests for `Registry.attachComponent` and other dataset operations
200 on composite datasets.
201 """
202 registry = self.makeRegistry()
203 childStorageClass = StorageClass("testComponentsChild")
204 registry.storageClasses.registerStorageClass(childStorageClass)
205 parentStorageClass = StorageClass("testComponentsParent",
206 components={"child1": childStorageClass,
207 "child2": childStorageClass})
208 registry.storageClasses.registerStorageClass(parentStorageClass)
209 parentDatasetType = DatasetType(name="parent",
210 dimensions=registry.dimensions.extract(("instrument",)),
211 storageClass=parentStorageClass)
212 childDatasetType1 = DatasetType(name="parent.child1",
213 dimensions=registry.dimensions.extract(("instrument",)),
214 storageClass=childStorageClass)
215 childDatasetType2 = DatasetType(name="parent.child2",
216 dimensions=registry.dimensions.extract(("instrument",)),
217 storageClass=childStorageClass)
218 registry.registerDatasetType(parentDatasetType)
219 registry.registerDatasetType(childDatasetType1)
220 registry.registerDatasetType(childDatasetType2)
221 dataId = {"instrument": "DummyCam"}
222 registry.insertDimensionData("instrument", dataId)
223 run = "test"
224 registry.registerRun(run)
225 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run)
226 children = {"child1": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0],
227 "child2": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]}
228 for name, child in children.items():
229 registry.attachComponent(name, parent, child)
230 self.assertEqual(parent.components, children)
231 outParent = registry.getDataset(parent.id)
232 self.assertEqual(outParent.components, children)
233 # Remove the parent; this should remove both children.
234 registry.removeDataset(parent)
235 self.assertIsNone(registry.find(run, parentDatasetType, dataId))
236 self.assertIsNone(registry.find(run, childDatasetType1, dataId))
237 self.assertIsNone(registry.find(run, childDatasetType2, dataId))
239 def testFind(self):
240 """Tests for `Registry.find`.
241 """
242 registry = self.makeRegistry()
243 storageClass = StorageClass("testFind")
244 registry.storageClasses.registerStorageClass(storageClass)
245 datasetType = DatasetType(name="dummytype",
246 dimensions=registry.dimensions.extract(("instrument", "visit")),
247 storageClass=storageClass)
248 registry.registerDatasetType(datasetType)
249 registry.insertDimensionData("instrument",
250 {"instrument": "DummyCam"},
251 {"instrument": "MyCam"})
252 registry.insertDimensionData("physical_filter",
253 {"instrument": "DummyCam", "physical_filter": "d-r",
254 "abstract_filter": "r"},
255 {"instrument": "MyCam", "physical_filter": "m-r",
256 "abstract_filter": "r"})
257 registry.insertDimensionData("visit",
258 {"instrument": "DummyCam", "id": 0, "name": "zero",
259 "physical_filter": "d-r"},
260 {"instrument": "DummyCam", "id": 1, "name": "one",
261 "physical_filter": "d-r"},
262 {"instrument": "DummyCam", "id": 2, "name": "two",
263 "physical_filter": "d-r"},
264 {"instrument": "MyCam", "id": 2, "name": "two",
265 "physical_filter": "m-r"})
266 run = "test"
267 dataId = {"instrument": "DummyCam", "visit": 0, "physical_filter": "d-r", "abstract_filter": None}
268 registry.registerRun(run)
269 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
270 outputRef = registry.find(run, datasetType, dataId)
271 self.assertEqual(outputRef, inputRef)
272 # Check that retrieval with invalid dataId raises
273 with self.assertRaises(LookupError):
274 dataId = {"instrument": "DummyCam", "abstract_filter": "g"} # should be visit
275 registry.find(run, datasetType, dataId)
276 # Check that different dataIds match to different datasets
277 dataId1 = {"instrument": "DummyCam", "visit": 1, "physical_filter": "d-r", "abstract_filter": None}
278 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
279 dataId2 = {"instrument": "DummyCam", "visit": 2, "physical_filter": "d-r", "abstract_filter": None}
280 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
281 dataId3 = {"instrument": "MyCam", "visit": 2, "physical_filter": "m-r", "abstract_filter": None}
282 inputRef3, = registry.insertDatasets(datasetType, dataIds=[dataId3], run=run)
283 self.assertEqual(registry.find(run, datasetType, dataId1), inputRef1)
284 self.assertEqual(registry.find(run, datasetType, dataId2), inputRef2)
285 self.assertEqual(registry.find(run, datasetType, dataId3), inputRef3)
286 self.assertNotEqual(registry.find(run, datasetType, dataId1), inputRef2)
287 self.assertNotEqual(registry.find(run, datasetType, dataId2), inputRef1)
288 self.assertNotEqual(registry.find(run, datasetType, dataId3), inputRef1)
289 # Check that requesting a non-existing dataId returns None
290 nonExistingDataId = {"instrument": "DummyCam", "visit": 42}
291 self.assertIsNone(registry.find(run, datasetType, nonExistingDataId))
293 def testCollections(self):
294 """Tests for `Registry.getAllCollections`, `Registry.registerRun`,
295 `Registry.disassociate`, and interactions between collections and
296 `Registry.find`.
297 """
298 registry = self.makeRegistry()
299 storageClass = StorageClass("testCollections")
300 registry.storageClasses.registerStorageClass(storageClass)
301 datasetType = DatasetType(name="dummytype",
302 dimensions=registry.dimensions.extract(("instrument", "visit")),
303 storageClass=storageClass)
304 registry.registerDatasetType(datasetType)
305 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
306 registry.insertDimensionData("physical_filter", {"instrument": "DummyCam", "physical_filter": "d-r",
307 "abstract_filter": "R"})
308 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 0, "name": "zero",
309 "physical_filter": "d-r"})
310 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 1, "name": "one",
311 "physical_filter": "d-r"})
312 run = "ingest"
313 registry.registerRun(run)
314 # Dataset.physical_filter should be populated as well here from the
315 # visit Dimension values.
316 dataId1 = {"instrument": "DummyCam", "visit": 0}
317 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
318 dataId2 = {"instrument": "DummyCam", "visit": 1}
319 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
320 # We should be able to find both datasets in their run
321 outputRef = registry.find(run, datasetType, dataId1)
322 self.assertEqual(outputRef, inputRef1)
323 outputRef = registry.find(run, datasetType, dataId2)
324 self.assertEqual(outputRef, inputRef2)
325 # and with the associated collection
326 newCollection = "something"
327 registry.associate(newCollection, [inputRef1, inputRef2])
328 outputRef = registry.find(newCollection, datasetType, dataId1)
329 self.assertEqual(outputRef, inputRef1)
330 outputRef = registry.find(newCollection, datasetType, dataId2)
331 self.assertEqual(outputRef, inputRef2)
332 # but no more after disassociation
333 registry.disassociate(newCollection, [inputRef1, ])
334 self.assertIsNone(registry.find(newCollection, datasetType, dataId1))
335 outputRef = registry.find(newCollection, datasetType, dataId2)
336 self.assertEqual(outputRef, inputRef2)
337 collections = registry.getAllCollections()
338 self.assertEqual(collections, {"something", "ingest"})
340 def testAssociate(self):
341 """Tests for `Registry.associate`.
342 """
343 registry = self.makeRegistry()
344 storageClass = StorageClass("testAssociate")
345 registry.storageClasses.registerStorageClass(storageClass)
346 dimensions = registry.dimensions.extract(("instrument", "visit"))
347 datasetType1 = DatasetType(name="dummytype", dimensions=dimensions, storageClass=storageClass)
348 registry.registerDatasetType(datasetType1)
349 datasetType2 = DatasetType(name="smartytype", dimensions=dimensions, storageClass=storageClass)
350 registry.registerDatasetType(datasetType2)
351 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
352 registry.insertDimensionData("physical_filter", {"instrument": "DummyCam", "physical_filter": "d-r",
353 "abstract_filter": "R"})
354 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 0, "name": "zero",
355 "physical_filter": "d-r"})
356 registry.insertDimensionData("visit", {"instrument": "DummyCam", "id": 1, "name": "one",
357 "physical_filter": "d-r"})
358 run1 = "ingest1"
359 registry.registerRun(run1)
360 run2 = "ingest2"
361 registry.registerRun(run2)
362 run3 = "ingest3"
363 registry.registerRun(run3)
364 # Dataset.physical_filter should be populated as well here
365 # from the visit Dimension values.
366 dataId1 = {"instrument": "DummyCam", "visit": 0}
367 dataId2 = {"instrument": "DummyCam", "visit": 1}
368 ref1_run1, ref2_run1 = registry.insertDatasets(datasetType1, dataIds=[dataId1, dataId2], run=run1)
369 ref1_run2, ref2_run2 = registry.insertDatasets(datasetType2, dataIds=[dataId1, dataId2], run=run2)
370 ref1_run3, ref2_run3 = registry.insertDatasets(datasetType2, dataIds=[dataId1, dataId2], run=run3)
371 for ref in (ref1_run1, ref2_run1, ref1_run2, ref2_run2, ref1_run3, ref2_run3):
372 self.assertEqual(ref.dataId.records["visit"].physical_filter, "d-r")
373 self.assertEqual(ref.dataId.records["physical_filter"].abstract_filter, "R")
374 # should have exactly 4 rows in Dataset
375 self.assertRowCount(registry, "dataset", 6)
376 self.assertRowCount(registry, "dataset_collection", 6)
377 # adding same DatasetRef to the same run is an error
378 with self.assertRaises(ConflictingDefinitionError):
379 registry.insertDatasets(datasetType1, dataIds=[dataId2], run=run1)
380 # above exception must rollback and not add anything to Dataset
381 self.assertRowCount(registry, "dataset", 6)
382 self.assertRowCount(registry, "dataset_collection", 6)
383 # associated refs from run1 with some other collection
384 newCollection = "something"
385 registry.associate(newCollection, [ref1_run1, ref2_run1])
386 self.assertRowCount(registry, "dataset_collection", 8)
387 # associating same exact DatasetRef is OK (not doing anything),
388 # two cases to test - single-ref and many-refs
389 registry.associate(newCollection, [ref1_run1])
390 registry.associate(newCollection, [ref1_run1, ref2_run1])
391 self.assertRowCount(registry, "dataset_collection", 8)
392 # associated refs from run2 with same other collection, this should
393 # be OK because thy have different dataset type
394 registry.associate(newCollection, [ref1_run2, ref2_run2])
395 self.assertRowCount(registry, "dataset_collection", 10)
396 # associating DatasetRef with the same units but different ID is not OK
397 with self.assertRaises(ConflictingDefinitionError):
398 registry.associate(newCollection, [ref1_run3])
399 with self.assertRaises(ConflictingDefinitionError):
400 registry.associate(newCollection, [ref1_run3, ref2_run3])
402 def testDatasetLocations(self):
403 """Tests for `Registry.insertDatasetLocations`,
404 `Registry.getDatasetLocations`, and `Registry.removeDatasetLocations`.
405 """
406 registry = self.makeRegistry()
407 storageClass = StorageClass("testStorageInfo")
408 registry.storageClasses.registerStorageClass(storageClass)
409 datasetType = DatasetType(name="test", dimensions=registry.dimensions.extract(("instrument",)),
410 storageClass=storageClass)
411 datasetType2 = DatasetType(name="test2", dimensions=registry.dimensions.extract(("instrument",)),
412 storageClass=storageClass)
413 registry.registerDatasetType(datasetType)
414 registry.registerDatasetType(datasetType2)
415 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
416 run = "test"
417 registry.registerRun(run)
418 ref, = registry.insertDatasets(datasetType, dataIds=[{"instrument": "DummyCam"}], run=run)
419 ref2, = registry.insertDatasets(datasetType2, dataIds=[{"instrument": "DummyCam"}], run=run)
420 datastoreName = "dummystore"
421 datastoreName2 = "dummystore2"
422 # Test adding information about a new dataset
423 registry.insertDatasetLocations(datastoreName, [ref])
424 addresses = registry.getDatasetLocations(ref)
425 self.assertIn(datastoreName, addresses)
426 self.assertEqual(len(addresses), 1)
427 registry.insertDatasetLocations(datastoreName2, [ref, ref2])
428 addresses = registry.getDatasetLocations(ref)
429 self.assertEqual(len(addresses), 2)
430 self.assertIn(datastoreName, addresses)
431 self.assertIn(datastoreName2, addresses)
432 registry.removeDatasetLocation(datastoreName, ref)
433 addresses = registry.getDatasetLocations(ref)
434 self.assertEqual(len(addresses), 1)
435 self.assertNotIn(datastoreName, addresses)
436 self.assertIn(datastoreName2, addresses)
437 with self.assertRaises(OrphanedRecordError):
438 registry.removeDataset(ref)
439 registry.removeDatasetLocation(datastoreName2, ref)
440 addresses = registry.getDatasetLocations(ref)
441 self.assertEqual(len(addresses), 0)
442 self.assertNotIn(datastoreName2, addresses)
443 registry.removeDataset(ref) # should not raise
444 addresses = registry.getDatasetLocations(ref2)
445 self.assertEqual(len(addresses), 1)
446 self.assertIn(datastoreName2, addresses)
448 def testBasicTransaction(self):
449 """Test that all operations within a single transaction block are
450 rolled back if an exception propagates out of the block.
451 """
452 registry = self.makeRegistry()
453 storageClass = StorageClass("testDatasetType")
454 registry.storageClasses.registerStorageClass(storageClass)
455 dimensions = registry.dimensions.extract(("instrument",))
456 dataId = {"instrument": "DummyCam"}
457 datasetTypeA = DatasetType(name="A",
458 dimensions=dimensions,
459 storageClass=storageClass)
460 datasetTypeB = DatasetType(name="B",
461 dimensions=dimensions,
462 storageClass=storageClass)
463 datasetTypeC = DatasetType(name="C",
464 dimensions=dimensions,
465 storageClass=storageClass)
466 run = "test"
467 registry.registerRun(run)
468 refId = None
469 with registry.transaction():
470 registry.registerDatasetType(datasetTypeA)
471 with self.assertRaises(ValueError):
472 with registry.transaction():
473 registry.registerDatasetType(datasetTypeB)
474 registry.registerDatasetType(datasetTypeC)
475 registry.insertDimensionData("instrument", {"instrument": "DummyCam"})
476 ref, = registry.insertDatasets(datasetTypeA, dataIds=[dataId], run=run)
477 refId = ref.id
478 raise ValueError("Oops, something went wrong")
479 # A should exist
480 self.assertEqual(registry.getDatasetType("A"), datasetTypeA)
481 # But B and C should both not exist
482 with self.assertRaises(KeyError):
483 registry.getDatasetType("B")
484 with self.assertRaises(KeyError):
485 registry.getDatasetType("C")
486 # And neither should the dataset
487 self.assertIsNotNone(refId)
488 self.assertIsNone(registry.getDataset(refId))
489 # Or the Dimension entries
490 with self.assertRaises(LookupError):
491 registry.expandDataId({"instrument": "DummyCam"})
493 def testNestedTransaction(self):
494 """Test that operations within a transaction block are not rolled back
495 if an exception propagates out of an inner transaction block and is
496 then caught.
497 """
498 registry = self.makeRegistry()
499 dimension = registry.dimensions["instrument"]
500 dataId1 = {"instrument": "DummyCam"}
501 dataId2 = {"instrument": "DummyCam2"}
502 checkpointReached = False
503 with registry.transaction():
504 # This should be added and (ultimately) committed.
505 registry.insertDimensionData(dimension, dataId1)
506 with self.assertRaises(sqlalchemy.exc.IntegrityError):
507 with registry.transaction():
508 # This does not conflict, and should succeed (but not
509 # be committed).
510 registry.insertDimensionData(dimension, dataId2)
511 checkpointReached = True
512 # This should conflict and raise, triggerring a rollback
513 # of the previous insertion within the same transaction
514 # context, but not the original insertion in the outer
515 # block.
516 registry.insertDimensionData(dimension, dataId1)
517 self.assertTrue(checkpointReached)
518 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
519 with self.assertRaises(LookupError):
520 registry.expandDataId(dataId2, graph=dimension.graph)
522 def testInstrumentDimensions(self):
523 """Test queries involving only instrument dimensions, with no joins to
524 skymap."""
525 registry = self.makeRegistry()
527 # need a bunch of dimensions and datasets for test
528 registry.insertDimensionData(
529 "instrument",
530 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
531 )
532 registry.insertDimensionData(
533 "physical_filter",
534 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
535 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
536 )
537 registry.insertDimensionData(
538 "detector",
539 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
540 )
541 registry.insertDimensionData(
542 "visit",
543 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
544 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
545 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
546 )
547 registry.insertDimensionData(
548 "exposure",
549 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i"),
550 dict(instrument="DummyCam", id=101, name="101", visit=10, physical_filter="dummy_i"),
551 dict(instrument="DummyCam", id=110, name="110", visit=11, physical_filter="dummy_r"),
552 dict(instrument="DummyCam", id=111, name="111", visit=11, physical_filter="dummy_r"),
553 dict(instrument="DummyCam", id=200, name="200", visit=20, physical_filter="dummy_r"),
554 dict(instrument="DummyCam", id=201, name="201", visit=20, physical_filter="dummy_r"),
555 )
556 # dataset types
557 run1 = "test"
558 run2 = "test2"
559 registry.registerRun(run1)
560 registry.registerRun(run2)
561 storageClass = StorageClass("testDataset")
562 registry.storageClasses.registerStorageClass(storageClass)
563 rawType = DatasetType(name="RAW",
564 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
565 storageClass=storageClass)
566 registry.registerDatasetType(rawType)
567 calexpType = DatasetType(name="CALEXP",
568 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
569 storageClass=storageClass)
570 registry.registerDatasetType(calexpType)
572 # add pre-existing datasets
573 for exposure in (100, 101, 110, 111):
574 for detector in (1, 2, 3):
575 # note that only 3 of 5 detectors have datasets
576 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
577 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
578 # exposures 100 and 101 appear in both collections, 100 has
579 # different dataset_id in different collections, for 101 only
580 # single dataset_id exists
581 if exposure == 100:
582 registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
583 if exposure == 101:
584 registry.associate(run2, [ref])
585 # Add pre-existing datasets to second collection.
586 for exposure in (200, 201):
587 for detector in (3, 4, 5):
588 # note that only 3 of 5 detectors have datasets
589 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
590 registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
592 dimensions = DimensionGraph(
593 registry.dimensions,
594 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
595 )
596 # Test that single dim string works as well as list of str
597 rows = list(registry.queryDimensions("visit", datasets={rawType: [run1]}, expand=True))
598 rowsI = list(registry.queryDimensions(["visit"], datasets={rawType: [run1]}, expand=True))
599 self.assertEqual(rows, rowsI)
600 # with empty expression
601 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]}, expand=True))
602 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
603 for dataId in rows:
604 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
605 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
606 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
607 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
608 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
609 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
610 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
611 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
612 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
613 (100, 101, 110, 111))
614 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
615 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
617 # second collection
618 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run2]}))
619 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
620 for dataId in rows:
621 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
622 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
623 (100, 101, 200, 201))
624 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
625 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
627 # with two input datasets
628 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1, run2]}))
629 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
630 for dataId in rows:
631 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure"))
632 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
633 (100, 101, 110, 111, 200, 201))
634 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
635 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
637 # limit to single visit
638 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]},
639 where="visit = 10"))
640 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
641 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
642 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
643 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
645 # more limiting expression, using link names instead of Table.column
646 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]},
647 where="visit = 10 and detector > 1"))
648 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
649 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
650 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
651 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
653 # expression excludes everything
654 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]},
655 where="visit > 1000"))
656 self.assertEqual(len(rows), 0)
658 # Selecting by physical_filter, this is not in the dimensions, but it
659 # is a part of the full expression so it should work too.
660 rows = list(registry.queryDimensions(dimensions, datasets={rawType: [run1]},
661 where="physical_filter = 'dummy_r'"))
662 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
663 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
664 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
665 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
667 def testSkyMapDimensions(self):
668 """Tests involving only skymap dimensions, no joins to instrument."""
669 registry = self.makeRegistry()
671 # need a bunch of dimensions and datasets for test, we want
672 # "abstract_filter" in the test so also have to add physical_filter
673 # dimensions
674 registry.insertDimensionData(
675 "instrument",
676 dict(instrument="DummyCam")
677 )
678 registry.insertDimensionData(
679 "physical_filter",
680 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
681 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
682 )
683 registry.insertDimensionData(
684 "skymap",
685 dict(name="DummyMap", hash="sha!".encode("utf8"))
686 )
687 for tract in range(10):
688 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
689 registry.insertDimensionData(
690 "patch",
691 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
692 for patch in range(10)]
693 )
695 # dataset types
696 run = "test"
697 registry.registerRun(run)
698 storageClass = StorageClass("testDataset")
699 registry.storageClasses.registerStorageClass(storageClass)
700 calexpType = DatasetType(name="deepCoadd_calexp",
701 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
702 "abstract_filter")),
703 storageClass=storageClass)
704 registry.registerDatasetType(calexpType)
705 mergeType = DatasetType(name="deepCoadd_mergeDet",
706 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
707 storageClass=storageClass)
708 registry.registerDatasetType(mergeType)
709 measType = DatasetType(name="deepCoadd_meas",
710 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
711 "abstract_filter")),
712 storageClass=storageClass)
713 registry.registerDatasetType(measType)
715 dimensions = DimensionGraph(
716 registry.dimensions,
717 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
718 | measType.dimensions.required)
719 )
721 # add pre-existing datasets
722 for tract in (1, 3, 5):
723 for patch in (2, 4, 6, 7):
724 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
725 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
726 for aFilter in ("i", "r"):
727 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
728 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
730 # with empty expression
731 rows = list(registry.queryDimensions(dimensions,
732 datasets={calexpType: [run], mergeType: [run]}))
733 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
734 for dataId in rows:
735 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
736 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
737 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
738 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
740 # limit to 2 tracts and 2 patches
741 rows = list(registry.queryDimensions(dimensions,
742 datasets={calexpType: [run], mergeType: [run]},
743 where="tract IN (1, 5) AND patch IN (2, 7)"))
744 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
745 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
746 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
747 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
749 # limit to single filter
750 rows = list(registry.queryDimensions(dimensions,
751 datasets={calexpType: [run], mergeType: [run]},
752 where="abstract_filter = 'i'"))
753 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
754 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
755 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
756 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
758 # expression excludes everything, specifying non-existing skymap is
759 # not a fatal error, it's operator error
760 rows = list(registry.queryDimensions(dimensions,
761 datasets={calexpType: [run], mergeType: [run]},
762 where="skymap = 'Mars'"))
763 self.assertEqual(len(rows), 0)
765 def testSpatialMatch(self):
766 """Test involving spatial match using join tables.
768 Note that realistic test needs a reasonably-defined skypix and regions
769 in registry tables which is hard to implement in this simple test.
770 So we do not actually fill registry with any data and all queries will
771 return empty result, but this is still useful for coverage of the code
772 that generates query.
773 """
774 registry = self.makeRegistry()
776 # dataset types
777 collection = "test"
778 registry.registerRun(name=collection)
779 storageClass = StorageClass("testDataset")
780 registry.storageClasses.registerStorageClass(storageClass)
782 calexpType = DatasetType(name="CALEXP",
783 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
784 storageClass=storageClass)
785 registry.registerDatasetType(calexpType)
787 coaddType = DatasetType(name="deepCoadd_calexp",
788 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
789 "abstract_filter")),
790 storageClass=storageClass)
791 registry.registerDatasetType(coaddType)
793 dimensions = DimensionGraph(
794 registry.dimensions,
795 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
796 )
798 # without data this should run OK but return empty set
799 rows = list(registry.queryDimensions(dimensions, datasets={calexpType: [collection]}))
800 self.assertEqual(len(rows), 0)
802 def testCalibrationLabelIndirection(self):
803 """Test that we can look up datasets with calibration_label dimensions
804 from a data ID with exposure dimensions.
805 """
806 registry = self.makeRegistry()
808 flat = DatasetType(
809 "flat",
810 registry.dimensions.extract(
811 ["instrument", "detector", "physical_filter", "calibration_label"]
812 ),
813 "ImageU"
814 )
815 registry.registerDatasetType(flat)
816 registry.insertDimensionData("instrument", dict(name="DummyCam"))
817 registry.insertDimensionData(
818 "physical_filter",
819 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
820 )
821 registry.insertDimensionData(
822 "detector",
823 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
824 )
825 registry.insertDimensionData(
826 "visit",
827 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
828 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_i"),
829 )
830 registry.insertDimensionData(
831 "exposure",
832 dict(instrument="DummyCam", id=100, name="100", visit=10, physical_filter="dummy_i",
833 datetime_begin=datetime(2005, 12, 15, 2), datetime_end=datetime(2005, 12, 15, 3)),
834 dict(instrument="DummyCam", id=101, name="101", visit=11, physical_filter="dummy_i",
835 datetime_begin=datetime(2005, 12, 16, 2), datetime_end=datetime(2005, 12, 16, 3)),
836 )
837 registry.insertDimensionData(
838 "calibration_label",
839 dict(instrument="DummyCam", name="first_night",
840 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 15, 4)),
841 dict(instrument="DummyCam", name="second_night",
842 datetime_begin=datetime(2005, 12, 16, 1), datetime_end=datetime(2005, 12, 16, 4)),
843 dict(instrument="DummyCam", name="both_nights",
844 datetime_begin=datetime(2005, 12, 15, 1), datetime_end=datetime(2005, 12, 16, 4)),
845 )
846 # Different flats for different nights for detectors 1-3 in first
847 # collection.
848 run1 = "calibs1"
849 registry.registerRun(run1)
850 for detector in (1, 2, 3):
851 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
852 physical_filter="dummy_i", detector=detector)],
853 run=run1)
854 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
855 physical_filter="dummy_i", detector=detector)],
856 run=run1)
857 # The same flat for both nights for detectors 3-5 (so detector 3 has
858 # multiple valid flats) in second collection.
859 run2 = "calib2"
860 registry.registerRun(run2)
861 for detector in (3, 4, 5):
862 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
863 physical_filter="dummy_i", detector=detector)],
864 run=run2)
865 # Perform queries for individual exposure+detector combinations, which
866 # should always return exactly one flat.
867 for exposure in (100, 101):
868 for detector in (1, 2, 3):
869 with self.subTest(exposure=exposure, detector=detector):
870 rows = registry.queryDatasets("flat", collections=[run1],
871 instrument="DummyCam",
872 exposure=exposure,
873 detector=detector)
874 self.assertEqual(len(list(rows)), 1)
875 for detector in (3, 4, 5):
876 with self.subTest(exposure=exposure, detector=detector):
877 rows = registry.queryDatasets("flat", collections=[run2],
878 instrument="DummyCam",
879 exposure=exposure,
880 detector=detector)
881 self.assertEqual(len(list(rows)), 1)
882 for detector in (1, 2, 4, 5):
883 with self.subTest(exposure=exposure, detector=detector):
884 rows = registry.queryDatasets("flat", collections=[run1, run2],
885 instrument="DummyCam",
886 exposure=exposure,
887 detector=detector)
888 self.assertEqual(len(list(rows)), 1)
889 for detector in (3,):
890 with self.subTest(exposure=exposure, detector=detector):
891 rows = registry.queryDatasets("flat", collections=[run1, run2],
892 instrument="DummyCam",
893 exposure=exposure,
894 detector=detector)
895 self.assertEqual(len(list(rows)), 2)
897 def testAbstractFilterQuery(self):
898 """Test that we can run a query that just lists the known
899 abstract_filters. This is tricky because abstract_filter is
900 backed by a query against physical_filter.
901 """
902 registry = self.makeRegistry()
903 registry.insertDimensionData("instrument", dict(name="DummyCam"))
904 registry.insertDimensionData(
905 "physical_filter",
906 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
907 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
908 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
909 )
910 rows = list(registry.queryDimensions(["abstract_filter"]))
911 self.assertCountEqual(
912 rows,
913 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
914 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
915 )