Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26import itertools
27import os
28import re
29import unittest
31import astropy.time
32import sqlalchemy
33from typing import Optional
35try:
36 import numpy as np
37except ImportError:
38 np = None
40from ...core import (
41 DataCoordinate,
42 DataCoordinateSequence,
43 DataCoordinateSet,
44 DatasetRef,
45 DatasetType,
46 DimensionGraph,
47 NamedValueSet,
48 StorageClass,
49 ddl,
50)
51from .._registry import (
52 CollectionType,
53 ConflictingDefinitionError,
54 InconsistentDataIdError,
55 Registry,
56 RegistryConfig,
57)
58from ..wildcards import DatasetTypeRestriction
59from ..interfaces import MissingCollectionError, ButlerAttributeExistsError
62class RegistryTests(ABC):
63 """Generic tests for the `Registry` class that can be subclassed to
64 generate tests for different configurations.
65 """
67 collectionsManager: Optional[str] = None
68 """Name of the collections manager class, if subclass provides value for
69 this member then it overrides name specified in default configuration
70 (`str`).
71 """
73 @classmethod
74 @abstractmethod
75 def getDataDir(cls) -> str:
76 """Return the root directory containing test data YAML files.
77 """
78 raise NotImplementedError()
80 def makeRegistryConfig(self) -> RegistryConfig:
81 """Create RegistryConfig used to create a registry.
83 This method should be called by a subclass from `makeRegistry`.
84 Returned instance will be pre-configured based on the values of class
85 members, and default-configured for all other parametrs. Subclasses
86 that need default configuration should just instantiate
87 `RegistryConfig` directly.
88 """
89 config = RegistryConfig()
90 if self.collectionsManager:
91 config["managers"]["collections"] = self.collectionsManager
92 return config
94 @abstractmethod
95 def makeRegistry(self) -> Registry:
96 """Return the Registry instance to be tested.
97 """
98 raise NotImplementedError()
100 def loadData(self, registry: Registry, filename: str):
101 """Load registry test data from ``getDataDir/<filename>``,
102 which should be a YAML import/export file.
103 """
104 from ...transfers import YamlRepoImportBackend
105 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
106 backend = YamlRepoImportBackend(stream, registry)
107 backend.register()
108 backend.load(datastore=None)
110 def assertRowCount(self, registry: Registry, table: str, count: int):
111 """Check the number of rows in table.
112 """
113 # TODO: all tests that rely on this method should be rewritten, as it
114 # needs to depend on Registry implementation details to have any chance
115 # of working.
116 sql = sqlalchemy.sql.select(
117 [sqlalchemy.sql.func.count()]
118 ).select_from(
119 getattr(registry._tables, table)
120 )
121 self.assertEqual(registry._db.query(sql).scalar(), count)
123 def testOpaque(self):
124 """Tests for `Registry.registerOpaqueTable`,
125 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
126 `Registry.deleteOpaqueData`.
127 """
128 registry = self.makeRegistry()
129 table = "opaque_table_for_testing"
130 registry.registerOpaqueTable(
131 table,
132 spec=ddl.TableSpec(
133 fields=[
134 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
135 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
136 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
137 ],
138 )
139 )
140 rows = [
141 {"id": 1, "name": "one", "count": None},
142 {"id": 2, "name": "two", "count": 5},
143 {"id": 3, "name": "three", "count": 6},
144 ]
145 registry.insertOpaqueData(table, *rows)
146 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
147 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
148 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
149 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
150 registry.deleteOpaqueData(table, id=3)
151 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
152 registry.deleteOpaqueData(table)
153 self.assertEqual([], list(registry.fetchOpaqueData(table)))
155 def testDatasetType(self):
156 """Tests for `Registry.registerDatasetType` and
157 `Registry.getDatasetType`.
158 """
159 registry = self.makeRegistry()
160 # Check valid insert
161 datasetTypeName = "test"
162 storageClass = StorageClass("testDatasetType")
163 registry.storageClasses.registerStorageClass(storageClass)
164 dimensions = registry.dimensions.extract(("instrument", "visit"))
165 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
166 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
167 # Inserting for the first time should return True
168 self.assertTrue(registry.registerDatasetType(inDatasetType))
169 outDatasetType1 = registry.getDatasetType(datasetTypeName)
170 self.assertEqual(outDatasetType1, inDatasetType)
172 # Re-inserting should work
173 self.assertFalse(registry.registerDatasetType(inDatasetType))
174 # Except when they are not identical
175 with self.assertRaises(ConflictingDefinitionError):
176 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
177 registry.registerDatasetType(nonIdenticalDatasetType)
179 # Template can be None
180 datasetTypeName = "testNoneTemplate"
181 storageClass = StorageClass("testDatasetType2")
182 registry.storageClasses.registerStorageClass(storageClass)
183 dimensions = registry.dimensions.extract(("instrument", "visit"))
184 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
185 registry.registerDatasetType(inDatasetType)
186 outDatasetType2 = registry.getDatasetType(datasetTypeName)
187 self.assertEqual(outDatasetType2, inDatasetType)
189 allTypes = set(registry.queryDatasetTypes())
190 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
192 def testDimensions(self):
193 """Tests for `Registry.insertDimensionData`,
194 `Registry.syncDimensionData`, and `Registry.expandDataId`.
195 """
196 registry = self.makeRegistry()
197 dimensionName = "instrument"
198 dimension = registry.dimensions[dimensionName]
199 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
200 "class_name": "lsst.obs.base.Instrument"}
201 registry.insertDimensionData(dimensionName, dimensionValue)
202 # Inserting the same value twice should fail
203 with self.assertRaises(sqlalchemy.exc.IntegrityError):
204 registry.insertDimensionData(dimensionName, dimensionValue)
205 # expandDataId should retrieve the record we just inserted
206 self.assertEqual(
207 registry.expandDataId(
208 instrument="DummyCam",
209 graph=dimension.graph
210 ).records[dimensionName].toDict(),
211 dimensionValue
212 )
213 # expandDataId should raise if there is no record with the given ID.
214 with self.assertRaises(LookupError):
215 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
216 # abstract_filter doesn't have a table; insert should fail.
217 with self.assertRaises(TypeError):
218 registry.insertDimensionData("abstract_filter", {"abstract_filter": "i"})
219 dimensionName2 = "physical_filter"
220 dimension2 = registry.dimensions[dimensionName2]
221 dimensionValue2 = {"name": "DummyCam_i", "abstract_filter": "i"}
222 # Missing required dependency ("instrument") should fail
223 with self.assertRaises(KeyError):
224 registry.insertDimensionData(dimensionName2, dimensionValue2)
225 # Adding required dependency should fix the failure
226 dimensionValue2["instrument"] = "DummyCam"
227 registry.insertDimensionData(dimensionName2, dimensionValue2)
228 # expandDataId should retrieve the record we just inserted.
229 self.assertEqual(
230 registry.expandDataId(
231 instrument="DummyCam", physical_filter="DummyCam_i",
232 graph=dimension2.graph
233 ).records[dimensionName2].toDict(),
234 dimensionValue2
235 )
236 # Use syncDimensionData to insert a new record successfully.
237 dimensionName3 = "detector"
238 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
239 "name_in_raft": "zero", "purpose": "SCIENCE"}
240 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
241 # Sync that again. Note that one field ("raft") is NULL, and that
242 # should be okay.
243 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
244 # Now try that sync with the same primary key but a different value.
245 # This should fail.
246 with self.assertRaises(ConflictingDefinitionError):
247 registry.syncDimensionData(
248 dimensionName3,
249 {"instrument": "DummyCam", "id": 1, "full_name": "one",
250 "name_in_raft": "four", "purpose": "SCIENCE"}
251 )
253 @unittest.skipIf(np is None, "numpy not available.")
254 def testNumpyDataId(self):
255 """Test that we can use a numpy int in a dataId."""
256 registry = self.makeRegistry()
257 dimensionEntries = [
258 ("instrument", {"instrument": "DummyCam"}),
259 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "abstract_filter": "R"}),
260 # Using an np.int64 here fails unless Records.fromDict is also
261 # patched to look for numbers.Integral
262 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
263 ]
264 for args in dimensionEntries:
265 registry.insertDimensionData(*args)
267 # Try a normal integer and something that looks like an int but
268 # is not.
269 for visit_id in (42, np.int64(42)):
270 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
271 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
272 self.assertEqual(expanded["visit"], int(visit_id))
273 self.assertIsInstance(expanded["visit"], int)
275 def testDataIdRelationships(self):
276 """Test that `Registry.expandDataId` raises an exception when the given
277 keys are inconsistent.
278 """
279 registry = self.makeRegistry()
280 self.loadData(registry, "base.yaml")
281 # Insert a few more dimension records for the next test.
282 registry.insertDimensionData(
283 "exposure",
284 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
285 )
286 registry.insertDimensionData(
287 "exposure",
288 {"instrument": "Cam1", "id": 2, "name": "two", "physical_filter": "Cam1-G"},
289 )
290 registry.insertDimensionData(
291 "visit_system",
292 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
293 )
294 registry.insertDimensionData(
295 "visit",
296 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
297 )
298 registry.insertDimensionData(
299 "visit_definition",
300 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
301 )
302 with self.assertRaises(InconsistentDataIdError):
303 registry.expandDataId(
304 {"instrument": "Cam1", "visit": 1, "exposure": 2},
305 )
307 def testDataset(self):
308 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
309 and `Registry.removeDatasets`.
310 """
311 registry = self.makeRegistry()
312 self.loadData(registry, "base.yaml")
313 run = "test"
314 registry.registerRun(run)
315 datasetType = registry.getDatasetType("permabias")
316 dataId = {"instrument": "Cam1", "detector": 2}
317 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
318 outRef = registry.getDataset(ref.id)
319 self.assertIsNotNone(ref.id)
320 self.assertEqual(ref, outRef)
321 with self.assertRaises(ConflictingDefinitionError):
322 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
323 registry.removeDatasets([ref])
324 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
326 def testFindDataset(self):
327 """Tests for `Registry.findDataset`.
328 """
329 registry = self.makeRegistry()
330 self.loadData(registry, "base.yaml")
331 run = "test"
332 datasetType = registry.getDatasetType("permabias")
333 dataId = {"instrument": "Cam1", "detector": 4}
334 registry.registerRun(run)
335 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
336 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
337 self.assertEqual(outputRef, inputRef)
338 # Check that retrieval with invalid dataId raises
339 with self.assertRaises(LookupError):
340 dataId = {"instrument": "Cam1"} # no detector
341 registry.findDataset(datasetType, dataId, collections=run)
342 # Check that different dataIds match to different datasets
343 dataId1 = {"instrument": "Cam1", "detector": 1}
344 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
345 dataId2 = {"instrument": "Cam1", "detector": 2}
346 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
347 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
348 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
349 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
350 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
351 # Check that requesting a non-existing dataId returns None
352 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
353 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
355 def testDatasetTypeComponentQueries(self):
356 """Test component options when querying for dataset types.
357 """
358 registry = self.makeRegistry()
359 self.loadData(registry, "base.yaml")
360 self.loadData(registry, "datasets.yaml")
361 # Test querying for dataset types with different inputs.
362 # First query for all dataset types; components should only be included
363 # when components=True.
364 self.assertEqual(
365 {"permabias", "permaflat"},
366 NamedValueSet(registry.queryDatasetTypes()).names
367 )
368 self.assertEqual(
369 {"permabias", "permaflat"},
370 NamedValueSet(registry.queryDatasetTypes(components=False)).names
371 )
372 self.assertLess(
373 {"permabias", "permaflat", "permabias.wcs", "permaflat.photoCalib"},
374 NamedValueSet(registry.queryDatasetTypes(components=True)).names
375 )
376 # Use a pattern that can match either parent or components. Again,
377 # components are only returned if components=True.
378 self.assertEqual(
379 {"permabias"},
380 NamedValueSet(registry.queryDatasetTypes(re.compile(".+bias.*"))).names
381 )
382 self.assertEqual(
383 {"permabias"},
384 NamedValueSet(registry.queryDatasetTypes(re.compile(".+bias.*"), components=False)).names
385 )
386 self.assertLess(
387 {"permabias", "permabias.wcs"},
388 NamedValueSet(registry.queryDatasetTypes(re.compile(".+bias.*"), components=True)).names
389 )
390 # This pattern matches only a component. In this case we also return
391 # that component dataset type if components=None.
392 self.assertEqual(
393 {"permabias.wcs"},
394 NamedValueSet(registry.queryDatasetTypes(re.compile(r".+bias\.wcs"))).names
395 )
396 self.assertEqual(
397 set(),
398 NamedValueSet(registry.queryDatasetTypes(re.compile(r".+bias\.wcs"), components=False)).names
399 )
400 self.assertEqual(
401 {"permabias.wcs"},
402 NamedValueSet(registry.queryDatasetTypes(re.compile(r".+bias\.wcs"), components=True)).names
403 )
405 def testComponentLookups(self):
406 """Test searching for component datasets via their parents.
407 """
408 registry = self.makeRegistry()
409 self.loadData(registry, "base.yaml")
410 self.loadData(registry, "datasets.yaml")
411 # Test getting the child dataset type (which does still exist in the
412 # Registry), and check for consistency with
413 # DatasetRef.makeComponentRef.
414 collection = "imported_g"
415 parentType = registry.getDatasetType("permabias")
416 childType = registry.getDatasetType("permabias.wcs")
417 parentRefResolved = registry.findDataset(parentType, collections=collection,
418 instrument="Cam1", detector=1)
419 self.assertIsInstance(parentRefResolved, DatasetRef)
420 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
421 # Search for a single dataset with findDataset.
422 childRef1 = registry.findDataset("permabias.wcs", collections=collection,
423 dataId=parentRefResolved.dataId)
424 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
425 # Search for detector data IDs constrained by component dataset
426 # existence with queryDataIds.
427 dataIds = registry.queryDataIds(
428 ["detector"],
429 datasets=["permabias.wcs"],
430 collections=collection,
431 ).toSet()
432 self.assertEqual(
433 dataIds,
434 DataCoordinateSet(
435 {
436 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
437 for d in (1, 2, 3)
438 },
439 parentType.dimensions,
440 )
441 )
442 # Search for multiple datasets of a single type with queryDatasets.
443 childRefs2 = set(registry.queryDatasets(
444 "permabias.wcs",
445 collections=collection,
446 ))
447 self.assertEqual(
448 {ref.unresolved() for ref in childRefs2},
449 {DatasetRef(childType, dataId) for dataId in dataIds}
450 )
452 def testCollections(self):
453 """Tests for registry methods that manage collections.
454 """
455 registry = self.makeRegistry()
456 self.loadData(registry, "base.yaml")
457 self.loadData(registry, "datasets.yaml")
458 run1 = "imported_g"
459 run2 = "imported_r"
460 datasetType = "permabias"
461 # Find some datasets via their run's collection.
462 dataId1 = {"instrument": "Cam1", "detector": 1}
463 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
464 self.assertIsNotNone(ref1)
465 dataId2 = {"instrument": "Cam1", "detector": 2}
466 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
467 self.assertIsNotNone(ref2)
468 # Associate those into a new collection,then look for them there.
469 tag1 = "tag1"
470 registry.registerCollection(tag1, type=CollectionType.TAGGED)
471 registry.associate(tag1, [ref1, ref2])
472 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
473 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
474 # Disassociate one and verify that we can't it there anymore...
475 registry.disassociate(tag1, [ref1])
476 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
477 # ...but we can still find ref2 in tag1, and ref1 in the run.
478 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
479 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
480 collections = set(registry.queryCollections())
481 self.assertEqual(collections, {run1, run2, tag1})
482 # Associate both refs into tag1 again; ref2 is already there, but that
483 # should be a harmless no-op.
484 registry.associate(tag1, [ref1, ref2])
485 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
486 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
487 # Get a different dataset (from a different run) that has the same
488 # dataset type and data ID as ref2.
489 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
490 self.assertNotEqual(ref2, ref2b)
491 # Attempting to associate that into tag1 should be an error.
492 with self.assertRaises(ConflictingDefinitionError):
493 registry.associate(tag1, [ref2b])
494 # That error shouldn't have messed up what we had before.
495 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
496 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
497 # Attempt to associate the conflicting dataset again, this time with
498 # a dataset that isn't in the collection and won't cause a conflict.
499 # Should also fail without modifying anything.
500 dataId3 = {"instrument": "Cam1", "detector": 3}
501 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
502 with self.assertRaises(ConflictingDefinitionError):
503 registry.associate(tag1, [ref3, ref2b])
504 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
505 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
506 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
507 # Register a chained collection that searches:
508 # 1. 'tag1'
509 # 2. 'run1', but only for the permaflat dataset
510 # 3. 'run2'
511 chain1 = "chain1"
512 registry.registerCollection(chain1, type=CollectionType.CHAINED)
513 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
514 # Chained collection exists, but has no collections in it.
515 self.assertFalse(registry.getCollectionChain(chain1))
516 # If we query for all collections, we should get the chained collection
517 # only if we don't ask to flatten it (i.e. yield only its children).
518 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
519 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
520 # Attempt to set its child collections to something circular; that
521 # should fail.
522 with self.assertRaises(ValueError):
523 registry.setCollectionChain(chain1, [tag1, chain1])
524 # Add the child collections.
525 registry.setCollectionChain(chain1, [tag1, (run1, "permaflat"), run2])
526 self.assertEqual(
527 list(registry.getCollectionChain(chain1)),
528 [(tag1, DatasetTypeRestriction.any),
529 (run1, DatasetTypeRestriction.fromExpression("permaflat")),
530 (run2, DatasetTypeRestriction.any)]
531 )
532 # Searching for dataId1 or dataId2 in the chain should return ref1 and
533 # ref2, because both are in tag1.
534 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
535 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
536 # Now disassociate ref2 from tag1. The search (for permabias) with
537 # dataId2 in chain1 should then:
538 # 1. not find it in tag1
539 # 2. not look in tag2, because it's restricted to permaflat here
540 # 3. find a different dataset in run2
541 registry.disassociate(tag1, [ref2])
542 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
543 self.assertNotEqual(ref2b, ref2)
544 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
545 # Look in the chain for a permaflat that is in run1; should get the
546 # same ref as if we'd searched run1 directly.
547 dataId3 = {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}
548 self.assertEqual(registry.findDataset("permaflat", dataId3, collections=chain1),
549 registry.findDataset("permaflat", dataId3, collections=run1),)
550 # Define a new chain so we can test recursive chains.
551 chain2 = "chain2"
552 registry.registerCollection(chain2, type=CollectionType.CHAINED)
553 registry.setCollectionChain(chain2, [(run2, "permabias"), chain1])
554 # Query for collections matching a regex.
555 self.assertCountEqual(
556 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
557 ["imported_r", "imported_g"]
558 )
559 # Query for collections matching a regex or an explicit str.
560 self.assertCountEqual(
561 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
562 ["imported_r", "imported_g", "chain1"]
563 )
564 # Search for permabias with dataId1 should find it via tag1 in chain2,
565 # recursing, because is not in run1.
566 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
567 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
568 # Search for permabias with dataId2 should find it in run2 (ref2b).
569 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
570 # Search for a permaflat that is in run2. That should not be found
571 # at the front of chain2, because of the restriction to permabias
572 # on run2 there, but it should be found in at the end of chain1.
573 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
574 ref4 = registry.findDataset("permaflat", dataId4, collections=run2)
575 self.assertIsNotNone(ref4)
576 self.assertEqual(ref4, registry.findDataset("permaflat", dataId4, collections=chain2))
577 # Deleting a collection that's part of a CHAINED collection is not
578 # allowed, and is exception-safe.
579 with self.assertRaises(Exception):
580 registry.removeCollection(run2)
581 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
582 with self.assertRaises(Exception):
583 registry.removeCollection(chain1)
584 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
585 # Actually remove chain2, test that it's gone by asking for its type.
586 registry.removeCollection(chain2)
587 with self.assertRaises(MissingCollectionError):
588 registry.getCollectionType(chain2)
589 # Actually remove run2 and chain1, which should work now.
590 registry.removeCollection(chain1)
591 registry.removeCollection(run2)
592 with self.assertRaises(MissingCollectionError):
593 registry.getCollectionType(run2)
594 with self.assertRaises(MissingCollectionError):
595 registry.getCollectionType(chain1)
596 # Remove tag1 as well, just to test that we can remove TAGGED
597 # collections.
598 registry.removeCollection(tag1)
599 with self.assertRaises(MissingCollectionError):
600 registry.getCollectionType(tag1)
602 def testBasicTransaction(self):
603 """Test that all operations within a single transaction block are
604 rolled back if an exception propagates out of the block.
605 """
606 registry = self.makeRegistry()
607 storageClass = StorageClass("testDatasetType")
608 registry.storageClasses.registerStorageClass(storageClass)
609 with registry.transaction():
610 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
611 with self.assertRaises(ValueError):
612 with registry.transaction():
613 registry.insertDimensionData("instrument", {"name": "Cam2"})
614 raise ValueError("Oops, something went wrong")
615 # Cam1 should exist
616 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
617 # But Cam2 and Cam3 should both not exist
618 with self.assertRaises(LookupError):
619 registry.expandDataId(instrument="Cam2")
620 with self.assertRaises(LookupError):
621 registry.expandDataId(instrument="Cam3")
623 def testNestedTransaction(self):
624 """Test that operations within a transaction block are not rolled back
625 if an exception propagates out of an inner transaction block and is
626 then caught.
627 """
628 registry = self.makeRegistry()
629 dimension = registry.dimensions["instrument"]
630 dataId1 = {"instrument": "DummyCam"}
631 dataId2 = {"instrument": "DummyCam2"}
632 checkpointReached = False
633 with registry.transaction():
634 # This should be added and (ultimately) committed.
635 registry.insertDimensionData(dimension, dataId1)
636 with self.assertRaises(sqlalchemy.exc.IntegrityError):
637 with registry.transaction(savepoint=True):
638 # This does not conflict, and should succeed (but not
639 # be committed).
640 registry.insertDimensionData(dimension, dataId2)
641 checkpointReached = True
642 # This should conflict and raise, triggerring a rollback
643 # of the previous insertion within the same transaction
644 # context, but not the original insertion in the outer
645 # block.
646 registry.insertDimensionData(dimension, dataId1)
647 self.assertTrue(checkpointReached)
648 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
649 with self.assertRaises(LookupError):
650 registry.expandDataId(dataId2, graph=dimension.graph)
652 def testInstrumentDimensions(self):
653 """Test queries involving only instrument dimensions, with no joins to
654 skymap."""
655 registry = self.makeRegistry()
657 # need a bunch of dimensions and datasets for test
658 registry.insertDimensionData(
659 "instrument",
660 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
661 )
662 registry.insertDimensionData(
663 "physical_filter",
664 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
665 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
666 )
667 registry.insertDimensionData(
668 "detector",
669 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
670 )
671 registry.insertDimensionData(
672 "visit_system",
673 dict(instrument="DummyCam", id=1, name="default"),
674 )
675 registry.insertDimensionData(
676 "visit",
677 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
678 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
679 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
680 )
681 registry.insertDimensionData(
682 "exposure",
683 dict(instrument="DummyCam", id=100, name="100", physical_filter="dummy_i"),
684 dict(instrument="DummyCam", id=101, name="101", physical_filter="dummy_i"),
685 dict(instrument="DummyCam", id=110, name="110", physical_filter="dummy_r"),
686 dict(instrument="DummyCam", id=111, name="111", physical_filter="dummy_r"),
687 dict(instrument="DummyCam", id=200, name="200", physical_filter="dummy_r"),
688 dict(instrument="DummyCam", id=201, name="201", physical_filter="dummy_r"),
689 )
690 registry.insertDimensionData(
691 "visit_definition",
692 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
693 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
694 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
695 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
696 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
697 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
698 )
699 # dataset types
700 run1 = "test1_r"
701 run2 = "test2_r"
702 tagged2 = "test2_t"
703 registry.registerRun(run1)
704 registry.registerRun(run2)
705 registry.registerCollection(tagged2)
706 storageClass = StorageClass("testDataset")
707 registry.storageClasses.registerStorageClass(storageClass)
708 rawType = DatasetType(name="RAW",
709 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
710 storageClass=storageClass)
711 registry.registerDatasetType(rawType)
712 calexpType = DatasetType(name="CALEXP",
713 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
714 storageClass=storageClass)
715 registry.registerDatasetType(calexpType)
717 # add pre-existing datasets
718 for exposure in (100, 101, 110, 111):
719 for detector in (1, 2, 3):
720 # note that only 3 of 5 detectors have datasets
721 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
722 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
723 # exposures 100 and 101 appear in both run1 and tagged2.
724 # 100 has different datasets in the different collections
725 # 101 has the same dataset in both collections.
726 if exposure == 100:
727 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
728 if exposure in (100, 101):
729 registry.associate(tagged2, [ref])
730 # Add pre-existing datasets to tagged2.
731 for exposure in (200, 201):
732 for detector in (3, 4, 5):
733 # note that only 3 of 5 detectors have datasets
734 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
735 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
736 registry.associate(tagged2, [ref])
738 dimensions = DimensionGraph(
739 registry.dimensions,
740 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
741 )
742 # Test that single dim string works as well as list of str
743 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
744 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
745 self.assertEqual(rows, rowsI)
746 # with empty expression
747 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
748 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
749 for dataId in rows:
750 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
751 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
752 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
753 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
754 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
755 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
756 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
757 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
758 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
759 (100, 101, 110, 111))
760 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
761 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
763 # second collection
764 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
765 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
766 for dataId in rows:
767 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
768 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
769 (100, 101, 200, 201))
770 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
771 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
773 # with two input datasets
774 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
775 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
776 for dataId in rows:
777 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
778 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
779 (100, 101, 110, 111, 200, 201))
780 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
781 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
783 # limit to single visit
784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
785 where="visit = 10").toSet()
786 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
787 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
788 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
789 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
791 # more limiting expression, using link names instead of Table.column
792 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
793 where="visit = 10 and detector > 1").toSet()
794 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
795 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
796 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
797 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
799 # expression excludes everything
800 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
801 where="visit > 1000").toSet()
802 self.assertEqual(len(rows), 0)
804 # Selecting by physical_filter, this is not in the dimensions, but it
805 # is a part of the full expression so it should work too.
806 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
807 where="physical_filter = 'dummy_r'").toSet()
808 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
809 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
810 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
811 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
813 def testSkyMapDimensions(self):
814 """Tests involving only skymap dimensions, no joins to instrument."""
815 registry = self.makeRegistry()
817 # need a bunch of dimensions and datasets for test, we want
818 # "abstract_filter" in the test so also have to add physical_filter
819 # dimensions
820 registry.insertDimensionData(
821 "instrument",
822 dict(instrument="DummyCam")
823 )
824 registry.insertDimensionData(
825 "physical_filter",
826 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
827 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
828 )
829 registry.insertDimensionData(
830 "skymap",
831 dict(name="DummyMap", hash="sha!".encode("utf8"))
832 )
833 for tract in range(10):
834 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
835 registry.insertDimensionData(
836 "patch",
837 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
838 for patch in range(10)]
839 )
841 # dataset types
842 run = "test"
843 registry.registerRun(run)
844 storageClass = StorageClass("testDataset")
845 registry.storageClasses.registerStorageClass(storageClass)
846 calexpType = DatasetType(name="deepCoadd_calexp",
847 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
848 "abstract_filter")),
849 storageClass=storageClass)
850 registry.registerDatasetType(calexpType)
851 mergeType = DatasetType(name="deepCoadd_mergeDet",
852 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
853 storageClass=storageClass)
854 registry.registerDatasetType(mergeType)
855 measType = DatasetType(name="deepCoadd_meas",
856 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
857 "abstract_filter")),
858 storageClass=storageClass)
859 registry.registerDatasetType(measType)
861 dimensions = DimensionGraph(
862 registry.dimensions,
863 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
864 | measType.dimensions.required)
865 )
867 # add pre-existing datasets
868 for tract in (1, 3, 5):
869 for patch in (2, 4, 6, 7):
870 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
871 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
872 for aFilter in ("i", "r"):
873 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, abstract_filter=aFilter)
874 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
876 # with empty expression
877 rows = registry.queryDataIds(dimensions,
878 datasets=[calexpType, mergeType], collections=run).toSet()
879 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
880 for dataId in rows:
881 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "abstract_filter"))
882 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
883 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
884 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
886 # limit to 2 tracts and 2 patches
887 rows = registry.queryDataIds(dimensions,
888 datasets=[calexpType, mergeType], collections=run,
889 where="tract IN (1, 5) AND patch IN (2, 7)").toSet()
890 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
891 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
892 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
893 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i", "r"))
895 # limit to single filter
896 rows = registry.queryDataIds(dimensions,
897 datasets=[calexpType, mergeType], collections=run,
898 where="abstract_filter = 'i'").toSet()
899 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
900 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
901 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
902 self.assertCountEqual(set(dataId["abstract_filter"] for dataId in rows), ("i",))
904 # expression excludes everything, specifying non-existing skymap is
905 # not a fatal error, it's operator error
906 rows = registry.queryDataIds(dimensions,
907 datasets=[calexpType, mergeType], collections=run,
908 where="skymap = 'Mars'").toSet()
909 self.assertEqual(len(rows), 0)
911 def testSpatialMatch(self):
912 """Test involving spatial match using join tables.
914 Note that realistic test needs a reasonably-defined skypix and regions
915 in registry tables which is hard to implement in this simple test.
916 So we do not actually fill registry with any data and all queries will
917 return empty result, but this is still useful for coverage of the code
918 that generates query.
919 """
920 registry = self.makeRegistry()
922 # dataset types
923 collection = "test"
924 registry.registerRun(name=collection)
925 storageClass = StorageClass("testDataset")
926 registry.storageClasses.registerStorageClass(storageClass)
928 calexpType = DatasetType(name="CALEXP",
929 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
930 storageClass=storageClass)
931 registry.registerDatasetType(calexpType)
933 coaddType = DatasetType(name="deepCoadd_calexp",
934 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
935 "abstract_filter")),
936 storageClass=storageClass)
937 registry.registerDatasetType(coaddType)
939 dimensions = DimensionGraph(
940 registry.dimensions,
941 dimensions=(calexpType.dimensions.required | coaddType.dimensions.required)
942 )
944 # without data this should run OK but return empty set
945 rows = registry.queryDataIds(dimensions, datasets=calexpType, collections=collection).toSet()
946 self.assertEqual(len(rows), 0)
948 def testCalibrationLabelIndirection(self):
949 """Test that we can look up datasets with calibration_label dimensions
950 from a data ID with exposure dimensions.
951 """
953 def _dt(iso_string):
954 return astropy.time.Time(iso_string, format="iso", scale="tai")
956 registry = self.makeRegistry()
958 flat = DatasetType(
959 "flat",
960 registry.dimensions.extract(
961 ["instrument", "detector", "physical_filter", "calibration_label"]
962 ),
963 "ImageU"
964 )
965 registry.registerDatasetType(flat)
966 registry.insertDimensionData("instrument", dict(name="DummyCam"))
967 registry.insertDimensionData(
968 "physical_filter",
969 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
970 )
971 registry.insertDimensionData(
972 "detector",
973 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in (1, 2, 3, 4, 5)]
974 )
975 registry.insertDimensionData(
976 "exposure",
977 dict(instrument="DummyCam", id=100, name="100", physical_filter="dummy_i",
978 datetime_begin=_dt("2005-12-15 02:00:00"), datetime_end=_dt("2005-12-15 03:00:00")),
979 dict(instrument="DummyCam", id=101, name="101", physical_filter="dummy_i",
980 datetime_begin=_dt("2005-12-16 02:00:00"), datetime_end=_dt("2005-12-16 03:00:00")),
981 )
982 registry.insertDimensionData(
983 "calibration_label",
984 dict(instrument="DummyCam", name="first_night",
985 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-15 04:00:00")),
986 dict(instrument="DummyCam", name="second_night",
987 datetime_begin=_dt("2005-12-16 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
988 dict(instrument="DummyCam", name="both_nights",
989 datetime_begin=_dt("2005-12-15 01:00:00"), datetime_end=_dt("2005-12-16 04:00:00")),
990 )
991 # Different flats for different nights for detectors 1-3 in first
992 # collection.
993 run1 = "calibs1"
994 registry.registerRun(run1)
995 for detector in (1, 2, 3):
996 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="first_night",
997 physical_filter="dummy_i", detector=detector)],
998 run=run1)
999 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="second_night",
1000 physical_filter="dummy_i", detector=detector)],
1001 run=run1)
1002 # The same flat for both nights for detectors 3-5 (so detector 3 has
1003 # multiple valid flats) in second collection.
1004 run2 = "calib2"
1005 registry.registerRun(run2)
1006 for detector in (3, 4, 5):
1007 registry.insertDatasets(flat, [dict(instrument="DummyCam", calibration_label="both_nights",
1008 physical_filter="dummy_i", detector=detector)],
1009 run=run2)
1010 # Perform queries for individual exposure+detector combinations, which
1011 # should always return exactly one flat.
1012 for exposure in (100, 101):
1013 for detector in (1, 2, 3):
1014 with self.subTest(exposure=exposure, detector=detector):
1015 rows = list(registry.queryDatasets("flat", collections=[run1],
1016 instrument="DummyCam",
1017 exposure=exposure,
1018 detector=detector))
1019 self.assertEqual(len(rows), 1)
1020 for detector in (3, 4, 5):
1021 with self.subTest(exposure=exposure, detector=detector):
1022 rows = registry.queryDatasets("flat", collections=[run2],
1023 instrument="DummyCam",
1024 exposure=exposure,
1025 detector=detector)
1026 self.assertEqual(len(list(rows)), 1)
1027 for detector in (1, 2, 4, 5):
1028 with self.subTest(exposure=exposure, detector=detector):
1029 rows = registry.queryDatasets("flat", collections=[run1, run2],
1030 instrument="DummyCam",
1031 exposure=exposure,
1032 detector=detector)
1033 self.assertEqual(len(list(rows)), 1)
1034 for detector in (3,):
1035 with self.subTest(exposure=exposure, detector=detector):
1036 rows = registry.queryDatasets("flat", collections=[run1, run2],
1037 instrument="DummyCam",
1038 exposure=exposure,
1039 detector=detector)
1040 self.assertEqual(len(list(rows)), 2)
1042 def testAbstractFilterQuery(self):
1043 """Test that we can run a query that just lists the known
1044 abstract_filters. This is tricky because abstract_filter is
1045 backed by a query against physical_filter.
1046 """
1047 registry = self.makeRegistry()
1048 registry.insertDimensionData("instrument", dict(name="DummyCam"))
1049 registry.insertDimensionData(
1050 "physical_filter",
1051 dict(instrument="DummyCam", name="dummy_i", abstract_filter="i"),
1052 dict(instrument="DummyCam", name="dummy_i2", abstract_filter="i"),
1053 dict(instrument="DummyCam", name="dummy_r", abstract_filter="r"),
1054 )
1055 rows = registry.queryDataIds(["abstract_filter"]).toSet()
1056 self.assertCountEqual(
1057 rows,
1058 [DataCoordinate.standardize(abstract_filter="i", universe=registry.dimensions),
1059 DataCoordinate.standardize(abstract_filter="r", universe=registry.dimensions)]
1060 )
1062 def testAttributeManager(self):
1063 """Test basic functionality of attribute manager.
1064 """
1065 # number of attributes with schema versions in a fresh database,
1066 # 6 managers with 3 records per manager
1067 VERSION_COUNT = 6 * 3
1069 registry = self.makeRegistry()
1070 attributes = registry._attributes
1072 # check what get() returns for non-existing key
1073 self.assertIsNone(attributes.get("attr"))
1074 self.assertEqual(attributes.get("attr", ""), "")
1075 self.assertEqual(attributes.get("attr", "Value"), "Value")
1076 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1078 # cannot store empty key or value
1079 with self.assertRaises(ValueError):
1080 attributes.set("", "value")
1081 with self.assertRaises(ValueError):
1082 attributes.set("attr", "")
1084 # set value of non-existing key
1085 attributes.set("attr", "value")
1086 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1087 self.assertEqual(attributes.get("attr"), "value")
1089 # update value of existing key
1090 with self.assertRaises(ButlerAttributeExistsError):
1091 attributes.set("attr", "value2")
1093 attributes.set("attr", "value2", force=True)
1094 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1095 self.assertEqual(attributes.get("attr"), "value2")
1097 # delete existing key
1098 self.assertTrue(attributes.delete("attr"))
1099 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1101 # delete non-existing key
1102 self.assertFalse(attributes.delete("non-attr"))
1104 # store bunch of keys and get the list back
1105 data = [
1106 ("version.core", "1.2.3"),
1107 ("version.dimensions", "3.2.1"),
1108 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1109 ]
1110 for key, value in data:
1111 attributes.set(key, value)
1112 items = dict(attributes.items())
1113 for key, value in data:
1114 self.assertEqual(items[key], value)
1116 def testQueryDatasetsDeduplication(self):
1117 """Test that the deduplicate option to queryDatasets selects datasets
1118 from collections in the order given".
1119 """
1120 registry = self.makeRegistry()
1121 self.loadData(registry, "base.yaml")
1122 self.loadData(registry, "datasets.yaml")
1123 self.assertCountEqual(
1124 list(registry.queryDatasets("permabias", collections=["imported_g", "imported_r"])),
1125 [
1126 registry.findDataset("permabias", instrument="Cam1", detector=1, collections="imported_g"),
1127 registry.findDataset("permabias", instrument="Cam1", detector=2, collections="imported_g"),
1128 registry.findDataset("permabias", instrument="Cam1", detector=3, collections="imported_g"),
1129 registry.findDataset("permabias", instrument="Cam1", detector=2, collections="imported_r"),
1130 registry.findDataset("permabias", instrument="Cam1", detector=3, collections="imported_r"),
1131 registry.findDataset("permabias", instrument="Cam1", detector=4, collections="imported_r"),
1132 ]
1133 )
1134 self.assertCountEqual(
1135 list(registry.queryDatasets("permabias", collections=["imported_g", "imported_r"],
1136 deduplicate=True)),
1137 [
1138 registry.findDataset("permabias", instrument="Cam1", detector=1, collections="imported_g"),
1139 registry.findDataset("permabias", instrument="Cam1", detector=2, collections="imported_g"),
1140 registry.findDataset("permabias", instrument="Cam1", detector=3, collections="imported_g"),
1141 registry.findDataset("permabias", instrument="Cam1", detector=4, collections="imported_r"),
1142 ]
1143 )
1144 self.assertCountEqual(
1145 list(registry.queryDatasets("permabias", collections=["imported_r", "imported_g"],
1146 deduplicate=True)),
1147 [
1148 registry.findDataset("permabias", instrument="Cam1", detector=1, collections="imported_g"),
1149 registry.findDataset("permabias", instrument="Cam1", detector=2, collections="imported_r"),
1150 registry.findDataset("permabias", instrument="Cam1", detector=3, collections="imported_r"),
1151 registry.findDataset("permabias", instrument="Cam1", detector=4, collections="imported_r"),
1152 ]
1153 )
1155 def testQueryResults(self):
1156 """Test querying for data IDs and then manipulating the QueryResults
1157 object returned to perform other queries.
1158 """
1159 registry = self.makeRegistry()
1160 self.loadData(registry, "base.yaml")
1161 self.loadData(registry, "datasets.yaml")
1162 bias = registry.getDatasetType("permabias")
1163 flat = registry.getDatasetType("permaflat")
1164 # Obtain expected results from methods other than those we're testing
1165 # here. That includes:
1166 # - the dimensions of the data IDs we want to query:
1167 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1168 # - the dimensions of some other data IDs we'll extract from that:
1169 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1170 # - the data IDs we expect to obtain from the first queries:
1171 expectedDataIds = DataCoordinateSet(
1172 {
1173 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1174 universe=registry.dimensions)
1175 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1176 },
1177 graph=expectedGraph,
1178 hasFull=False,
1179 hasRecords=False,
1180 )
1181 # - the flat datasets we expect to find from those data IDs, in just
1182 # one collection (so deduplication is irrelevant):
1183 expectedFlats = [
1184 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1185 collections="imported_r"),
1186 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1187 collections="imported_r"),
1188 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1189 collections="imported_r"),
1190 ]
1191 # - the data IDs we expect to extract from that:
1192 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1193 # - the bias datasets we expect to find from those data IDs, after we
1194 # subset-out the physical_filter dimension, both with duplicates:
1195 expectedAllBiases = [
1196 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1197 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1198 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1199 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1200 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1201 ]
1202 # - ...and without duplicates:
1203 expectedDeduplicatedBiases = [
1204 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1205 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1206 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1207 ]
1208 # Test against those expected results, using a "lazy" query for the
1209 # data IDs (which re-executes that query each time we use it to do
1210 # something new).
1211 dataIds = registry.queryDataIds(
1212 ["detector", "physical_filter"],
1213 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1214 )
1215 self.assertEqual(dataIds.graph, expectedGraph)
1216 self.assertEqual(dataIds.toSet(), expectedDataIds)
1217 self.assertCountEqual(
1218 list(
1219 dataIds.findDatasets(
1220 flat,
1221 collections=["imported_r"],
1222 )
1223 ),
1224 expectedFlats,
1225 )
1226 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1227 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1228 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1229 self.assertCountEqual(
1230 list(
1231 subsetDataIds.findDatasets(
1232 bias,
1233 collections=["imported_r", "imported_g"],
1234 deduplicate=False
1235 )
1236 ),
1237 expectedAllBiases
1238 )
1239 self.assertCountEqual(
1240 list(
1241 subsetDataIds.findDatasets(
1242 bias,
1243 collections=["imported_r", "imported_g"],
1244 deduplicate=True
1245 )
1246 ), expectedDeduplicatedBiases
1247 )
1248 # Materialize the bias dataset queries (only) by putting the results
1249 # into temporary tables, then repeat those tests.
1250 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1251 deduplicate=False).materialize() as biases:
1252 self.assertCountEqual(list(biases), expectedAllBiases)
1253 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1254 deduplicate=True).materialize() as biases:
1255 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1256 # Materialize the data ID subset query, but not the dataset queries.
1257 with subsetDataIds.materialize() as subsetDataIds:
1258 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1259 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1260 self.assertCountEqual(
1261 list(
1262 subsetDataIds.findDatasets(
1263 bias,
1264 collections=["imported_r", "imported_g"],
1265 deduplicate=False
1266 )
1267 ),
1268 expectedAllBiases
1269 )
1270 self.assertCountEqual(
1271 list(
1272 subsetDataIds.findDatasets(
1273 bias,
1274 collections=["imported_r", "imported_g"],
1275 deduplicate=True
1276 )
1277 ), expectedDeduplicatedBiases
1278 )
1279 # Materialize the dataset queries, too.
1280 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1281 deduplicate=False).materialize() as biases:
1282 self.assertCountEqual(list(biases), expectedAllBiases)
1283 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1284 deduplicate=True).materialize() as biases:
1285 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1286 # Materialize the original query, but none of the follow-up queries.
1287 with dataIds.materialize() as dataIds:
1288 self.assertEqual(dataIds.graph, expectedGraph)
1289 self.assertEqual(dataIds.toSet(), expectedDataIds)
1290 self.assertCountEqual(
1291 list(
1292 dataIds.findDatasets(
1293 flat,
1294 collections=["imported_r"],
1295 )
1296 ),
1297 expectedFlats,
1298 )
1299 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1300 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1301 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1302 self.assertCountEqual(
1303 list(
1304 subsetDataIds.findDatasets(
1305 bias,
1306 collections=["imported_r", "imported_g"],
1307 deduplicate=False
1308 )
1309 ),
1310 expectedAllBiases
1311 )
1312 self.assertCountEqual(
1313 list(
1314 subsetDataIds.findDatasets(
1315 bias,
1316 collections=["imported_r", "imported_g"],
1317 deduplicate=True
1318 )
1319 ), expectedDeduplicatedBiases
1320 )
1321 # Materialize just the bias dataset queries.
1322 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1323 deduplicate=False).materialize() as biases:
1324 self.assertCountEqual(list(biases), expectedAllBiases)
1325 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1326 deduplicate=True).materialize() as biases:
1327 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1328 # Materialize the subset data ID query, but not the dataset
1329 # queries.
1330 with subsetDataIds.materialize() as subsetDataIds:
1331 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1332 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1333 self.assertCountEqual(
1334 list(
1335 subsetDataIds.findDatasets(
1336 bias,
1337 collections=["imported_r", "imported_g"],
1338 deduplicate=False
1339 )
1340 ),
1341 expectedAllBiases
1342 )
1343 self.assertCountEqual(
1344 list(
1345 subsetDataIds.findDatasets(
1346 bias,
1347 collections=["imported_r", "imported_g"],
1348 deduplicate=True
1349 )
1350 ), expectedDeduplicatedBiases
1351 )
1352 # Materialize the bias dataset queries, too, so now we're
1353 # materializing every single step.
1354 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1355 deduplicate=False).materialize() as biases:
1356 self.assertCountEqual(list(biases), expectedAllBiases)
1357 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1358 deduplicate=True).materialize() as biases:
1359 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1361 def testEmptyDimensionsQueries(self):
1362 """Test Query and QueryResults objects in the case where there are no
1363 dimensions.
1364 """
1365 # Set up test data: one dataset type, two runs, one dataset in each.
1366 registry = self.makeRegistry()
1367 self.loadData(registry, "base.yaml")
1368 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1369 registry.registerDatasetType(schema)
1370 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1371 run1 = "run1"
1372 run2 = "run2"
1373 registry.registerRun(run1)
1374 registry.registerRun(run2)
1375 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1376 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1377 # Query directly for both of the datasets, and each one, one at a time.
1378 self.assertCountEqual(
1379 list(registry.queryDatasets(schema, collections=[run1, run2], deduplicate=False)),
1380 [dataset1, dataset2]
1381 )
1382 self.assertEqual(
1383 list(registry.queryDatasets(schema, collections=[run1, run2], deduplicate=True)),
1384 [dataset1],
1385 )
1386 self.assertEqual(
1387 list(registry.queryDatasets(schema, collections=[run2, run1], deduplicate=True)),
1388 [dataset2],
1389 )
1390 # Query for data IDs with no dimensions.
1391 dataIds = registry.queryDataIds([])
1392 self.assertEqual(
1393 dataIds.toSequence(),
1394 DataCoordinateSequence([dataId], registry.dimensions.empty)
1395 )
1396 # Use queried data IDs to find the datasets.
1397 self.assertCountEqual(
1398 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1399 [dataset1, dataset2],
1400 )
1401 self.assertEqual(
1402 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1403 [dataset1],
1404 )
1405 self.assertEqual(
1406 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1407 [dataset2],
1408 )
1409 # Now materialize the data ID query results and repeat those tests.
1410 with dataIds.materialize() as dataIds:
1411 self.assertEqual(
1412 dataIds.toSequence(),
1413 DataCoordinateSequence([dataId], registry.dimensions.empty)
1414 )
1415 self.assertCountEqual(
1416 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1417 [dataset1, dataset2],
1418 )
1419 self.assertEqual(
1420 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1421 [dataset1],
1422 )
1423 self.assertEqual(
1424 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1425 [dataset2],
1426 )
1427 # Query for non-empty data IDs, then subset that to get the empty one.
1428 # Repeat the above tests starting from that.
1429 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1430 self.assertEqual(
1431 dataIds.toSequence(),
1432 DataCoordinateSequence([dataId], registry.dimensions.empty)
1433 )
1434 self.assertCountEqual(
1435 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1436 [dataset1, dataset2],
1437 )
1438 self.assertEqual(
1439 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1440 [dataset1],
1441 )
1442 self.assertEqual(
1443 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1444 [dataset2],
1445 )
1446 with dataIds.materialize() as dataIds:
1447 self.assertEqual(
1448 dataIds.toSequence(),
1449 DataCoordinateSequence([dataId], registry.dimensions.empty)
1450 )
1451 self.assertCountEqual(
1452 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1453 [dataset1, dataset2],
1454 )
1455 self.assertEqual(
1456 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1457 [dataset1],
1458 )
1459 self.assertEqual(
1460 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1461 [dataset2],
1462 )
1463 # Query for non-empty data IDs, then materialize, then subset to get
1464 # the empty one. Repeat again.
1465 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1466 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1467 self.assertEqual(
1468 dataIds.toSequence(),
1469 DataCoordinateSequence([dataId], registry.dimensions.empty)
1470 )
1471 self.assertCountEqual(
1472 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1473 [dataset1, dataset2],
1474 )
1475 self.assertEqual(
1476 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1477 [dataset1],
1478 )
1479 self.assertEqual(
1480 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1481 [dataset2],
1482 )
1483 with dataIds.materialize() as dataIds:
1484 self.assertEqual(
1485 dataIds.toSequence(),
1486 DataCoordinateSequence([dataId], registry.dimensions.empty)
1487 )
1488 self.assertCountEqual(
1489 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=False)),
1490 [dataset1, dataset2],
1491 )
1492 self.assertEqual(
1493 list(dataIds.findDatasets(schema, collections=[run1, run2], deduplicate=True)),
1494 [dataset1],
1495 )
1496 self.assertEqual(
1497 list(dataIds.findDatasets(schema, collections=[run2, run1], deduplicate=True)),
1498 [dataset2],
1499 )