Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26import os
28import astropy.time
29import sqlalchemy
30from typing import Optional
32from ...core import (
33 DataCoordinate,
34 DatasetType,
35 DimensionGraph,
36 StorageClass,
37 ddl,
38 YamlRepoImportBackend
39)
40from .._registry import (
41 CollectionType,
42 ConflictingDefinitionError,
43 ConsistentDataIds,
44 Registry,
45 RegistryConfig,
46)
47from ..wildcards import DatasetTypeRestriction
48from ..interfaces import MissingCollectionError
51class RegistryTests(ABC):
52 """Generic tests for the `Registry` class that can be subclassed to
53 generate tests for different configurations.
54 """
56 collectionsManager: Optional[str] = None
57 """Name of the collections manager class, if subclass provides value for
58 this member then it overrides name specified in default configuration
59 (`str`).
60 """
62 @classmethod
63 @abstractmethod
64 def getDataDir(cls) -> str:
65 """Return the root directory containing test data YAML files.
66 """
67 raise NotImplementedError()
69 def makeRegistryConfig(self) -> RegistryConfig:
70 """Create RegistryConfig used to create a registry.
72 This method should be called by a subclass from `makeRegistry`.
73 Returned instance will be pre-configured based on the values of class
74 members, and default-configured for all other parametrs. Subclasses
75 that need default configuration should just instantiate
76 `RegistryConfig` directly.
77 """
78 config = RegistryConfig()
79 if self.collectionsManager:
80 config["managers"]["collections"] = self.collectionsManager
81 return config
83 @abstractmethod
84 def makeRegistry(self) -> Registry:
85 """Return the Registry instance to be tested.
86 """
87 raise NotImplementedError()
89 def loadData(self, registry: Registry, filename: str):
90 """Load registry test data from ``getDataDir/<filename>``,
91 which should be a YAML import/export file.
92 """
93 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
94 backend = YamlRepoImportBackend(stream, registry)
95 backend.register()
96 backend.load(datastore=None)
98 def assertRowCount(self, registry: Registry, table: str, count: int):
99 """Check the number of rows in table.
100 """
101 # TODO: all tests that rely on this method should be rewritten, as it
102 # needs to depend on Registry implementation details to have any chance
103 # of working.
104 sql = sqlalchemy.sql.select(
105 [sqlalchemy.sql.func.count()]
106 ).select_from(
107 getattr(registry._tables, table)
108 )
109 self.assertEqual(registry._db.query(sql).scalar(), count)
111 def testOpaque(self):
112 """Tests for `Registry.registerOpaqueTable`,
113 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
114 `Registry.deleteOpaqueData`.
115 """
116 registry = self.makeRegistry()
117 table = "opaque_table_for_testing"
118 registry.registerOpaqueTable(
119 table,
120 spec=ddl.TableSpec(
121 fields=[
122 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
123 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
124 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
125 ],
126 )
127 )
128 rows = [
129 {"id": 1, "name": "one", "count": None},
130 {"id": 2, "name": "two", "count": 5},
131 {"id": 3, "name": "three", "count": 6},
132 ]
133 registry.insertOpaqueData(table, *rows)
134 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
135 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
136 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
137 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
138 registry.deleteOpaqueData(table, id=3)
139 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
140 registry.deleteOpaqueData(table)
141 self.assertEqual([], list(registry.fetchOpaqueData(table)))
143 def testDatasetType(self):
144 """Tests for `Registry.registerDatasetType` and
145 `Registry.getDatasetType`.
146 """
147 registry = self.makeRegistry()
148 # Check valid insert
149 datasetTypeName = "test"
150 storageClass = StorageClass("testDatasetType")
151 registry.storageClasses.registerStorageClass(storageClass)
152 dimensions = registry.dimensions.extract(("instrument", "visit"))
153 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
154 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
155 # Inserting for the first time should return True
156 self.assertTrue(registry.registerDatasetType(inDatasetType))
157 outDatasetType1 = registry.getDatasetType(datasetTypeName)
158 self.assertEqual(outDatasetType1, inDatasetType)
160 # Re-inserting should work
161 self.assertFalse(registry.registerDatasetType(inDatasetType))
162 # Except when they are not identical
163 with self.assertRaises(ConflictingDefinitionError):
164 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
165 registry.registerDatasetType(nonIdenticalDatasetType)
167 # Template can be None
168 datasetTypeName = "testNoneTemplate"
169 storageClass = StorageClass("testDatasetType2")
170 registry.storageClasses.registerStorageClass(storageClass)
171 dimensions = registry.dimensions.extract(("instrument", "visit"))
172 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
173 registry.registerDatasetType(inDatasetType)
174 outDatasetType2 = registry.getDatasetType(datasetTypeName)
175 self.assertEqual(outDatasetType2, inDatasetType)
177 allTypes = set(registry.queryDatasetTypes())
178 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
180 def testDimensions(self):
181 """Tests for `Registry.insertDimensionData`,
182 `Registry.syncDimensionData`, and `Registry.expandDataId`.
183 """
184 registry = self.makeRegistry()
185 dimensionName = "instrument"
186 dimension = registry.dimensions[dimensionName]
187 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
188 "class_name": "lsst.obs.base.Instrument"}
189 registry.insertDimensionData(dimensionName, dimensionValue)
190 # Inserting the same value twice should fail
191 with self.assertRaises(sqlalchemy.exc.IntegrityError):
192 registry.insertDimensionData(dimensionName, dimensionValue)
193 # expandDataId should retrieve the record we just inserted
194 self.assertEqual(
195 registry.expandDataId(
196 instrument="DummyCam",
197 graph=dimension.graph
198 ).records[dimensionName].toDict(),
199 dimensionValue
200 )
201 # expandDataId should raise if there is no record with the given ID.
202 with self.assertRaises(LookupError):
203 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
204 # abstract_filter doesn't have a table; insert should fail.
205 with self.assertRaises(TypeError):
206 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
207 dimensionName2 = "physical_filter"
208 dimension2 = registry.dimensions[dimensionName2]
209 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
210 # Missing required dependency ("instrument") should fail
211 with self.assertRaises(sqlalchemy.exc.IntegrityError):
212 registry.insertDimensionData(dimensionName2, dimensionValue2)
213 # Adding required dependency should fix the failure
214 dimensionValue2["instrument"] = "DummyCam"
215 registry.insertDimensionData(dimensionName2, dimensionValue2)
216 # expandDataId should retrieve the record we just inserted.
217 self.assertEqual(
218 registry.expandDataId(
219 instrument="DummyCam", physical_filter="DummyCam_i",
220 graph=dimension2.graph
221 ).records[dimensionName2].toDict(),
222 dimensionValue2
223 )
224 # Use syncDimensionData to insert a new record successfully.
225 dimensionName3 = "detector"
226 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
227 "name_in_raft": "zero", "purpose": "SCIENCE"}
228 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
229 # Sync that again. Note that one field ("raft") is NULL, and that
230 # should be okay.
231 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
232 # Now try that sync with the same primary key but a different value.
233 # This should fail.
234 with self.assertRaises(ConflictingDefinitionError):
235 registry.syncDimensionData(
236 dimensionName3,
237 {"instrument": "DummyCam", "id": 1, "full_name": "one",
238 "name_in_raft": "four", "purpose": "SCIENCE"}
239 )
241 def testDataIdRelationships(self):
242 """Test `Registry.relateDataId`.
243 """
244 registry = self.makeRegistry()
245 self.loadData(registry, "base.yaml")
246 # Simple cases where the dimension key-value pairs tell us everything.
247 self.assertEqual(
248 registry.relateDataIds(
249 {"instrument": "Cam1"},
250 {"instrument": "Cam1"},
251 ),
252 ConsistentDataIds(contains=True, within=True, overlaps=True)
253 )
254 self.assertEqual(
255 registry.relateDataIds({}, {}),
256 ConsistentDataIds(contains=True, within=True, overlaps=False)
257 )
258 self.assertEqual(
259 registry.relateDataIds({"instrument": "Cam1"}, {}),
260 ConsistentDataIds(contains=True, within=False, overlaps=False)
261 )
262 self.assertEqual(
263 registry.relateDataIds({}, {"instrument": "Cam1"}),
264 ConsistentDataIds(contains=False, within=True, overlaps=False)
265 )
266 self.assertEqual(
267 registry.relateDataIds(
268 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
269 {"instrument": "Cam1"},
270 ),
271 ConsistentDataIds(contains=True, within=False, overlaps=True)
272 )
273 self.assertEqual(
274 registry.relateDataIds(
275 {"instrument": "Cam1"},
276 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
277 ),
278 ConsistentDataIds(contains=False, within=True, overlaps=True)
279 )
280 self.assertIsNone(
281 registry.relateDataIds(
282 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
283 {"instrument": "Cam1", "physical_filter": "Cam1-R1"},
284 )
285 )
286 # Trickier cases where we need to expand data IDs, but it's still just
287 # required and implied dimension relationships.
288 self.assertEqual(
289 registry.relateDataIds(
290 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
291 {"instrument": "Cam1", "abstract_filter": "g"},
292 ),
293 ConsistentDataIds(contains=True, within=False, overlaps=True)
294 )
295 self.assertEqual(
296 registry.relateDataIds(
297 {"instrument": "Cam1", "abstract_filter": "g"},
298 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
299 ),
300 ConsistentDataIds(contains=False, within=True, overlaps=True)
301 )
302 self.assertEqual(
303 registry.relateDataIds(
304 {"instrument": "Cam1"},
305 {"htm7": 131073},
306 ),
307 ConsistentDataIds(contains=False, within=False, overlaps=False)
308 )
309 # Trickiest cases involve spatial or temporal overlaps or non-dimension
310 # elements that relate things (of which visit_definition is our only
311 # current example).
312 #
313 # These two HTM IDs at different levels have a "contains" relationship
314 # spatially, but there is no overlap of dimension keys. The exact
315 # result of relateDataIds is unspecified for this case, but it's
316 # guaranteed to be truthy (see relateDataIds docs.).
317 self.assertTrue(
318 registry.relateDataIds({"htm7": 131073}, {"htm9": 2097169})
319 )
320 # These two HTM IDs at different levels are disjoint spatially, which
321 # means the data IDs are inconsistent.
322 self.assertIsNone(
323 registry.relateDataIds({"htm7": 131073}, {"htm9": 2097391})
324 )
325 # Insert a few more dimension records for the next test.
326 registry.insertDimensionData(
327 "exposure",
328 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
329 )
330 registry.insertDimensionData(
331 "exposure",
332 {"instrument": "Cam1", "id": 2, "name": "two", "physical_filter": "Cam1-G"},
333 )
334 registry.insertDimensionData(
335 "visit_system",
336 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
337 )
338 registry.insertDimensionData(
339 "visit",
340 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
341 )
342 registry.insertDimensionData(
343 "visit_definition",
344 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
345 )
346 self.assertEqual(
347 registry.relateDataIds(
348 {"instrument": "Cam1", "visit": 1},
349 {"instrument": "Cam1", "exposure": 1},
350 ),
351 ConsistentDataIds(contains=False, within=False, overlaps=True)
352 )
353 self.assertIsNone(
354 registry.relateDataIds(
355 {"instrument": "Cam1", "visit": 1},
356 {"instrument": "Cam1", "exposure": 2},
357 )
358 )
360 def testDataset(self):
361 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
362 and `Registry.removeDatasets`.
363 """
364 registry = self.makeRegistry()
365 self.loadData(registry, "base.yaml")
366 run = "test"
367 registry.registerRun(run)
368 datasetType = registry.getDatasetType("permabias")
369 dataId = {"instrument": "Cam1", "detector": 2}
370 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
371 outRef = registry.getDataset(ref.id)
372 self.assertIsNotNone(ref.id)
373 self.assertEqual(ref, outRef)
374 with self.assertRaises(ConflictingDefinitionError):
375 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
376 registry.removeDatasets([ref])
377 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
379 def testComponents(self):
380 """Tests for `Registry.attachComponents` and other dataset operations
381 on composite datasets.
382 """
383 registry = self.makeRegistry()
384 self.loadData(registry, "base.yaml")
385 run = "test"
386 registry.registerRun(run)
387 parentDatasetType = registry.getDatasetType("permabias")
388 childDatasetType1 = registry.getDatasetType("permabias.image")
389 childDatasetType2 = registry.getDatasetType("permabias.mask")
390 dataId = {"instrument": "Cam1", "detector": 2}
391 parent, = registry.insertDatasets(parentDatasetType, dataIds=[dataId], run=run)
392 children = {"image": registry.insertDatasets(childDatasetType1, dataIds=[dataId], run=run)[0],
393 "mask": registry.insertDatasets(childDatasetType2, dataIds=[dataId], run=run)[0]}
394 parent = registry.attachComponents(parent, children)
395 self.assertEqual(parent.components, children)
396 outParent = registry.getDataset(parent.id)
397 self.assertEqual(outParent.components, children)
398 # Remove the parent; this should remove all children.
399 registry.removeDatasets([parent])
400 self.assertIsNone(registry.findDataset(parentDatasetType, dataId, collections=[run]))
401 self.assertIsNone(registry.findDataset(childDatasetType1, dataId, collections=[run]))
402 self.assertIsNone(registry.findDataset(childDatasetType2, dataId, collections=[run]))
404 def testFindDataset(self):
405 """Tests for `Registry.findDataset`.
406 """
407 registry = self.makeRegistry()
408 self.loadData(registry, "base.yaml")
409 run = "test"
410 datasetType = registry.getDatasetType("permabias")
411 dataId = {"instrument": "Cam1", "detector": 4}
412 registry.registerRun(run)
413 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
414 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
415 self.assertEqual(outputRef, inputRef)
416 # Check that retrieval with invalid dataId raises
417 with self.assertRaises(LookupError):
418 dataId = {"instrument": "Cam1"} # no detector
419 registry.findDataset(datasetType, dataId, collections=run)
420 # Check that different dataIds match to different datasets
421 dataId1 = {"instrument": "Cam1", "detector": 1}
422 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
423 dataId2 = {"instrument": "Cam1", "detector": 2}
424 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
425 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
426 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
427 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
428 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
429 # Check that requesting a non-existing dataId returns None
430 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
431 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
433 def testCollections(self):
434 """Tests for registry methods that manage collections.
435 """
436 registry = self.makeRegistry()
437 self.loadData(registry, "base.yaml")
438 self.loadData(registry, "datasets.yaml")
439 run1 = "imported_g"
440 run2 = "imported_r"
441 datasetType = "permabias"
442 # Find some datasets via their run's collection.
443 dataId1 = {"instrument": "Cam1", "detector": 1}
444 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
445 self.assertIsNotNone(ref1)
446 dataId2 = {"instrument": "Cam1", "detector": 2}
447 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
448 self.assertIsNotNone(ref2)
449 # Associate those into a new collection,then look for them there.
450 tag1 = "tag1"
451 registry.registerCollection(tag1, type=CollectionType.TAGGED)
452 registry.associate(tag1, [ref1, ref2])
453 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
454 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
455 # Disassociate one and verify that we can't it there anymore...
456 registry.disassociate(tag1, [ref1])
457 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
458 # ...but we can still find ref2 in tag1, and ref1 in the run.
459 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
460 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
461 collections = set(registry.queryCollections())
462 self.assertEqual(collections, {run1, run2, tag1})
463 # Associate both refs into tag1 again; ref2 is already there, but that
464 # should be a harmless no-op.
465 registry.associate(tag1, [ref1, ref2])
466 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
467 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
468 # Get a different dataset (from a different run) that has the same
469 # dataset type and data ID as ref2.
470 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
471 self.assertNotEqual(ref2, ref2b)
472 # Attempting to associate that into tag1 should be an error.
473 with self.assertRaises(ConflictingDefinitionError):
474 registry.associate(tag1, [ref2b])
475 # That error shouldn't have messed up what we had before.
476 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
477 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
478 # Attempt to associate the conflicting dataset again, this time with
479 # a dataset that isn't in the collection and won't cause a conflict.
480 # Should also fail without modifying anything.
481 dataId3 = {"instrument": "Cam1", "detector": 3}
482 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
483 with self.assertRaises(ConflictingDefinitionError):
484 registry.associate(tag1, [ref3, ref2b])
485 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
486 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
487 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
488 # Register a chained collection that searches:
489 # 1. 'tag1'
490 # 2. 'run1', but only for the permaflat dataset
491 # 3. 'run2'
492 chain1 = "chain1"
493 registry.registerCollection(chain1, type=CollectionType.CHAINED)
494 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
495 # Chained collection exists, but has no collections in it.
496 self.assertFalse(registry.getCollectionChain(chain1))
497 # If we query for all collections, we should get the chained collection
498 # only if we don't ask to flatten it (i.e. yield only its children).
499 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
500 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
501 # Attempt to set its child collections to something circular; that
502 # should fail.
503 with self.assertRaises(ValueError):
504 registry.setCollectionChain(chain1, [tag1, chain1])
505 # Add the child collections.
506 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2])
507 self.assertEqual(
508 list(registry.getCollectionChain(chain1)),
509 [(tag1, DatasetTypeRestriction.any),
510 (run1, DatasetTypeRestriction.fromExpression("permaflat")),
511 (run2, DatasetTypeRestriction.any)]
512 )
513 # Searching for dataId1 or dataId2 in the chain should return ref1 and
514 # ref2, because both are in tag1.
515 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
516 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
517 # Now disassociate ref2 from tag1. The search (for permabias) with
518 # dataId2 in chain1 should then:
519 # 1. not find it in tag1
520 # 2. not look in tag2, because it's restricted to permaflat here
521 # 3. find a different dataset in run2
522 registry.disassociate(tag1, [ref2])
523 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
524 self.assertNotEqual(ref2b, ref2)
525 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
526 # Look in the chain for a permaflat that is in run1; should get the
527 # same ref as if we'd searched run1 directly.
528 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}
529 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1),
530 registry.findDataset("permaflat", dataId3, collections=run1),)
531 # Define a new chain so we can test recursive chains.
532 chain2 = "chain2"
533 registry.registerCollection(chain2, type=CollectionType.CHAINED)
534 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1])
535 # Search for permabias with dataId1 should find it via tag1 in chain2,
536 # recursing, because is not in run1.
537 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
538 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
539 # Search for permabias with dataId2 should find it in run2 (ref2b).
540 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
541 # Search for a permaflat that is in run2. That should not be found
542 # at the front of chain2, because of the restriction to permabias
543 # on run2 there, but it should be found in at the end of chain1.
544 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
545 ref4 = registry.findDataset("permaflat", dataId4, collections=run2)
546 self.assertIsNotNone(ref4)
547 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2))
548 # Deleting a collection that's part of a CHAINED collection is not
549 # allowed, and is exception-safe.
550 with self.assertRaises(Exception):
551 registry.removeCollection(run2)
552 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
553 with self.assertRaises(Exception):
554 registry.removeCollection(chain1)
555 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
556 # Actually remove chain2, test that it's gone by asking for its type.
557 registry.removeCollection(chain2)
558 with self.assertRaises(MissingCollectionError):
559 registry.getCollectionType(chain2)
560 # Actually remove run2 and chain1, which should work now.
561 registry.removeCollection(chain1)
562 registry.removeCollection(run2)
563 with self.assertRaises(MissingCollectionError):
564 registry.getCollectionType(run2)
565 with self.assertRaises(MissingCollectionError):
566 registry.getCollectionType(chain1)
567 # Remove tag1 as well, just to test that we can remove TAGGED
568 # collections.
569 registry.removeCollection(tag1)
570 with self.assertRaises(MissingCollectionError):
571 registry.getCollectionType(tag1)
573 def testBasicTransaction(self):
574 """Test that all operations within a single transaction block are
575 rolled back if an exception propagates out of the block.
576 """
577 registry = self.makeRegistry()
578 storageClass = StorageClass("testDatasetType")
579 registry.storageClasses.registerStorageClass(storageClass)
580 with registry.transaction():
581 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
582 with self.assertRaises(ValueError):
583 with registry.transaction():
584 registry.insertDimensionData("instrument", {"name": "Cam2"})
585 raise ValueError("Oops, something went wrong")
586 # Cam1 should exist
587 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
588 # But Cam2 and Cam3 should both not exist
589 with self.assertRaises(LookupError):
590 registry.expandDataId(instrument="Cam2")
591 with self.assertRaises(LookupError):
592 registry.expandDataId(instrument="Cam3")
594 def testNestedTransaction(self):
595 """Test that operations within a transaction block are not rolled back
596 if an exception propagates out of an inner transaction block and is
597 then caught.
598 """
599 registry = self.makeRegistry()
600 dimension = registry.dimensions["instrument"]
601 dataId1 = {"instrument": "DummyCam"}
602 dataId2 = {"instrument": "DummyCam2"}
603 checkpointReached = False
604 with registry.transaction():
605 # This should be added and (ultimately) committed.
606 registry.insertDimensionData(dimension, dataId1)
607 with self.assertRaises(sqlalchemy.exc.IntegrityError):
608 with registry.transaction():
609 # This does not conflict, and should succeed (but not
610 # be committed).
611 registry.insertDimensionData(dimension, dataId2)
612 checkpointReached = True
613 # This should conflict and raise, triggerring a rollback
614 # of the previous insertion within the same transaction
615 # context, but not the original insertion in the outer
616 # block.
617 registry.insertDimensionData(dimension, dataId1)
618 self.assertTrue(checkpointReached)
619 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
620 with self.assertRaises(LookupError):
621 registry.expandDataId(dataId2, graph=dimension.graph)
623 def testInstrumentDimensions(self):
624 """Test queries involving only instrument dimensions, with no joins to
625 skymap."""
626 registry = self.makeRegistry()
628 # need a bunch of dimensions and datasets for test
629 registry.insertDimensionData(
630 "instrument",
631 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
632 )
633 registry.insertDimensionData(
634 "physical_filter",
635 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
636 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
637 )
638 registry.insertDimensionData(
639 "detector",
640 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
641 )
642 registry.insertDimensionData(
643 "visit_system",
644 dict(instrument="DummyCam", id=1, name="default"),
645 )
646 registry.insertDimensionData(
647 "visit",
648 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
649 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
650 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
651 )
652 registry.insertDimensionData(
653 "exposure",
654 dict(instrument="DummyCam", id=100, name="100", physical_filter="dummy_i"),
655 dict(instrument="DummyCam", id=101, name="101", physical_filter="dummy_i"),
656 dict(instrument="DummyCam", id=110, name="110", physical_filter="dummy_r"),
657 dict(instrument="DummyCam", id=111, name="111", physical_filter="dummy_r"),
658 dict(instrument="DummyCam", id=200, name="200", physical_filter="dummy_r"),
659 dict(instrument="DummyCam", id=201, name="201", physical_filter="dummy_r"),
660 )
661 registry.insertDimensionData(
662 "visit_definition",
663 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
664 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
665 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
666 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
667 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
668 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
669 )
670 # dataset types
671 run1 = "test1_r"
672 run2 = "test2_r"
673 tagged2 = "test2_t"
674 registry.registerRun(run1)
675 registry.registerRun(run2)
676 registry.registerCollection(tagged2)
677 storageClass = StorageClass("testDataset")
678 registry.storageClasses.registerStorageClass(storageClass)
679 rawType = DatasetType(name="RAW",
680 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
681 storageClass=storageClass)
682 registry.registerDatasetType(rawType)
683 calexpType = DatasetType(name="CALEXP",
684 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
685 storageClass=storageClass)
686 registry.registerDatasetType(calexpType)
688 # add pre-existing datasets
689 for exposure in (100, 101, 110, 111):
690 for detector in (1, 2, 3):
691 # note that only 3 of 5 detectors have datasets
692 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
693 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
694 # exposures 100 and 101 appear in both run1 and tagged2.
695 # 100 has different datasets in the different collections
696 # 101 has the same dataset in both collections.
697 if exposure == 100:
698 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
699 if exposure in (100, 101):
700 registry.associate(tagged2, [ref])
701 # Add pre-existing datasets to tagged2.
702 for exposure in (200, 201):
703 for detector in (3, 4, 5):
704 # note that only 3 of 5 detectors have datasets
705 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
706 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
707 registry.associate(tagged2, [ref])
709 dimensions = DimensionGraph(
710 registry.dimensions,
711 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
712 )
713 # Test that single dim string works as well as list of str
714 rows = list(registry.queryDimensions("visit", datasets=rawType, collections=run1, expand=True))
715 rowsI = list(registry.queryDimensions(["visit"], datasets=rawType, collections=run1, expand=True))
716 self.assertEqual(rows, rowsI)
717 # with empty expression
718 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1, expand=True))
719 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
720 for dataId in rows:
721 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
722 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
723 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
724 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
725 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
726 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
727 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
728 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
729 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
730 (100, 101, 110, 111))
731 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
732 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
734 # second collection
735 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=tagged2))
736 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
737 for dataId in rows:
738 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
739 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
740 (100, 101, 200, 201))
741 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
742 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
744 # with two input datasets
745 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=[run1, tagged2]))
746 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
747 for dataId in rows:
748 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
749 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
750 (100, 101, 110, 111, 200, 201))
751 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
752 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
754 # limit to single visit
755 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
756 where="visit = 10"))
757 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
758 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
759 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
760 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
762 # more limiting expression, using link names instead of Table.column
763 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
764 where="visit = 10 and detector > 1"))
765 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
766 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
767 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
768 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
770 # expression excludes everything
771 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
772 where="visit > 1000"))
773 self.assertEqual(len(rows), 0)
775 # Selecting by physical_filter, this is not in the dimensions, but it
776 # is a part of the full expression so it should work too.
777 rows = list(registry.queryDimensions(dimensions, datasets=rawType, collections=run1,
778 where="physical_filter = 'dummy_r'"))
779 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
780 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
781 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
782 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
784 def testSkyMapDimensions(self):
785 """Tests involving only skymap dimensions, no joins to instrument."""
786 registry = self.makeRegistry()
788 # need a bunch of dimensions and datasets for test, we want
789 # "abstract_filter" in the test so also have to add physical_filter
790 # dimensions
791 registry.insertDimensionData(
792 "instrument",
793 dict(instrument="DummyCam")
794 )
795 registry.insertDimensionData(
796 "physical_filter",
797 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
798 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
799 )
800 registry.insertDimensionData(
801 "skymap",
802 dict(name="DummyMap", hash="sha!".encode("utf8"))
803 )
804 for tract in range(10):
805 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
806 registry.insertDimensionData(
807 "patch",
808 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
809 for patch in range(10)]
810 )
812 # dataset types
813 run = "test"
814 registry.registerRun(run)
815 storageClass = StorageClass("testDataset")
816 registry.storageClasses.registerStorageClass(storageClass)
817 calexpType = DatasetType(name="deepCoadd_calexp",
818 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
819 "abstract_filter")),
820 storageClass=storageClass)
821 registry.registerDatasetType(calexpType)
822 mergeType = DatasetType(name="deepCoadd_mergeDet",
823 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
824 storageClass=storageClass)
825 registry.registerDatasetType(mergeType)
826 measType = DatasetType(name="deepCoadd_meas",
827 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
828 "abstract_filter")),
829 storageClass=storageClass)
830 registry.registerDatasetType(measType)
832 dimensions = DimensionGraph(
833 registry.dimensions,
834 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
835 | measType.dimensions.required)
836 )
838 # add pre-existing datasets
839 for tract in (1, 3, 5):
840 for patch in (2, 4, 6, 7):
841 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
842 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
843 for aFilter in ("i", "r"):
844 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
845 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
847 # with empty expression
848 rows = list(registry.queryDimensions(dimensions,
849 datasets=[calexpType, mergeType], collections=run))
850 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
851 for dataId in rows:
852 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
853 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
854 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
855 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
857 # limit to 2 tracts and 2 patches
858 rows = list(registry.queryDimensions(dimensions,
859 datasets=[calexpType, mergeType], collections=run,
860 where="tract IN (1, 5) AND patch IN (2, 7)"))
861 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
862 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
863 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
864 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
866 # limit to single filter
867 rows = list(registry.queryDimensions(dimensions,
868 datasets=[calexpType, mergeType], collections=run,
869 where="abstract_filter = 'i'"))
870 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
871 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
872 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
873 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
875 # expression excludes everything, specifying non-existing skymap is
876 # not a fatal error, it's operator error
877 rows = list(registry.queryDimensions(dimensions,
878 datasets=[calexpType, mergeType], collections=run,
879 where="skymap = 'Mars'"))
880 self.assertEqual(len(rows), 0)
882 def testSpatialMatch(self):
883 """Test involving spatial match using join tables.
885 Note that realistic test needs a reasonably-defined skypix and regions
886 in registry tables which is hard to implement in this simple test.
887 So we do not actually fill registry with any data and all queries will
888 return empty result, but this is still useful for coverage of the code
889 that generates query.
890 """
891 registry = self.makeRegistry()
893 # dataset types
894 collection = "test"
895 registry.registerRun(name=collection)
896 storageClass = StorageClass("testDataset")
897 registry.storageClasses.registerStorageClass(storageClass)
899 calexpType = DatasetType(name="CALEXP",
900 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
901 storageClass=storageClass)
902 registry.registerDatasetType(calexpType)
904 coaddType = DatasetType(name="deepCoadd_calexp",
905 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
906 "abstract_filter")),
907 storageClass=storageClass)
908 registry.registerDatasetType(coaddType)
910 dimensions = DimensionGraph(
911 registry.dimensions,
912 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
913 )
915 # without data this should run OK but return empty set
916 rows = list(registry.queryDimensions(dimensions, datasets=calexpType, collections=collection))
917 self.assertEqual(len(rows), 0)
919 def testCalibrationLabelIndirection(self):
920 """Test that we can look up datasets with calibration_label dimensions
921 from a data ID with exposure dimensions.
922 """
924 def _dt(iso_string):
925 return astropy.time.Time(iso_string, format="iso", scale="utc")
927 registry = self.makeRegistry()
929 flat = DatasetType(
930 "flat",
931 registry.dimensions.extract(
932 ["instrument", "detector", "physical_filter", "calibration_label"]
933 ),
934 "ImageU"
935 )
936 registry.registerDatasetType(flat)
937 registry.insertDimensionData("instrument", dict(name="DummyCam"))
938 registry.insertDimensionData(
939 "physical_filter",
940 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
941 )
942 registry.insertDimensionData(
943 "detector",
944 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
945 )
946 registry.insertDimensionData(
947 "exposure",
948 dict(instrument="DummyCam", id=100, name="100", physical_filter="dummy_i",
949 datetime_begin=_dt("2005-12-15 02:00:00"), datetime_end=_dt("2005-12-15 03:00:00")),
950 dict(instrument="DummyCam", id=101, name="101", physical_filter="dummy_i",
951 datetime_begin=_dt("2005-12-16 02:00:00"), datetime_end=_dt("2005-12-16 03:00:00")),
952 )
953 registry.insertDimensionData(
954 "calibration_label",
955 dict(instrument="DummyCam", name="first_night",
956 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-15 04:00:00")),
957 dict(instrument="DummyCam", name="second_night",
958 datetime_begin=_dt("2005-12-16 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
959 dict(instrument="DummyCam", name="both_nights",
960 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
961 )
962 # Different flats for different nights for detectors 1-3 in first
963 # collection.
964 run1 = "calibs1"
965 registry.registerRun(run1)
966 for detector in (1, 2, 3):
967 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
968 physical_filter="dummy_i", detector=detector)],
969 run=run1)
970 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
971 physical_filter="dummy_i", detector=detector)],
972 run=run1)
973 # The same flat for both nights for detectors 3-5 (so detector 3 has
974 # multiple valid flats) in second collection.
975 run2 = "calib2"
976 registry.registerRun(run2)
977 for detector in (3, 4, 5):
978 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
979 physical_filter="dummy_i", detector=detector)],
980 run=run2)
981 # Perform queries for individual exposure+detector combinations, which
982 # should always return exactly one flat.
983 for exposure in (100, 101):
984 for detector in (1, 2, 3):
985 with self.subTest(exposure=exposure, detector=detector):
986 rows = list(registry.queryDatasets("flat", collections=[run1],
987 instrument="DummyCam",
988 exposure=exposure,
989 detector=detector))
990 self.assertEqual(len(rows), 1)
991 for detector in (3, 4, 5):
992 with self.subTest(exposure=exposure, detector=detector):
993 rows = registry.queryDatasets("flat", collections=[run2],
994 instrument="DummyCam",
995 exposure=exposure,
996 detector=detector)
997 self.assertEqual(len(list(rows)), 1)
998 for detector in (1, 2, 4, 5):
999 with self.subTest(exposure=exposure, detector=detector):
1000 rows = registry.queryDatasets("flat", collections=[run1, run2],
1001 instrument="DummyCam",
1002 exposure=exposure,
1003 detector=detector)
1004 self.assertEqual(len(list(rows)), 1)
1005 for detector in (3,):
1006 with self.subTest(exposure=exposure, detector=detector):
1007 rows = registry.queryDatasets("flat", collections=[run1, run2],
1008 instrument="DummyCam",
1009 exposure=exposure,
1010 detector=detector)
1011 self.assertEqual(len(list(rows)), 2)
1013 def testAbstractFilterQuery(self):
1014 """Test that we can run a query that just lists the known
1015 abstract_filters. This is tricky because abstract_filter is
1016 backed by a query against physical_filter.
1017 """
1018 registry = self.makeRegistry()
1019 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1020 registry.insertDimensionData(
1021 "physical_filter",
1022 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
1023 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
1024 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
1025 )
1026 rows = list(registry.queryDimensions(["abstract_filter"]))
1027 self.assertCountEqual(
1028 rows,
1029 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
1030 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
1031 )