Coverage for python/lsst/daf/butler/registry/tests/_registry.py : 5%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = ["RegistryTests"]
25from abc import ABC, abstractmethod
26from collections import defaultdict
27import itertools
28import os
29import re
30import unittest
32import astropy.time
33import sqlalchemy
34from typing import Optional, Type, Union
36try:
37 import numpy as np
38except ImportError:
39 np = None
41from ...core import (
42 DataCoordinate,
43 DataCoordinateSequence,
44 DataCoordinateSet,
45 DatasetAssociation,
46 DatasetRef,
47 DatasetType,
48 DimensionGraph,
49 NamedValueSet,
50 StorageClass,
51 ddl,
52 Timespan,
53)
54from .._registry import (
55 CollectionType,
56 ConflictingDefinitionError,
57 InconsistentDataIdError,
58 Registry,
59 RegistryConfig,
60)
61from ..interfaces import MissingCollectionError, ButlerAttributeExistsError
64class RegistryTests(ABC):
65 """Generic tests for the `Registry` class that can be subclassed to
66 generate tests for different configurations.
67 """
69 collectionsManager: Optional[str] = None
70 """Name of the collections manager class, if subclass provides value for
71 this member then it overrides name specified in default configuration
72 (`str`).
73 """
75 @classmethod
76 @abstractmethod
77 def getDataDir(cls) -> str:
78 """Return the root directory containing test data YAML files.
79 """
80 raise NotImplementedError()
82 def makeRegistryConfig(self) -> RegistryConfig:
83 """Create RegistryConfig used to create a registry.
85 This method should be called by a subclass from `makeRegistry`.
86 Returned instance will be pre-configured based on the values of class
87 members, and default-configured for all other parametrs. Subclasses
88 that need default configuration should just instantiate
89 `RegistryConfig` directly.
90 """
91 config = RegistryConfig()
92 if self.collectionsManager:
93 config["managers"]["collections"] = self.collectionsManager
94 return config
96 @abstractmethod
97 def makeRegistry(self) -> Registry:
98 """Return the Registry instance to be tested.
99 """
100 raise NotImplementedError()
102 def loadData(self, registry: Registry, filename: str):
103 """Load registry test data from ``getDataDir/<filename>``,
104 which should be a YAML import/export file.
105 """
106 from ...transfers import YamlRepoImportBackend
107 with open(os.path.join(self.getDataDir(), filename), 'r') as stream:
108 backend = YamlRepoImportBackend(stream, registry)
109 backend.register()
110 backend.load(datastore=None)
112 def testOpaque(self):
113 """Tests for `Registry.registerOpaqueTable`,
114 `Registry.insertOpaqueData`, `Registry.fetchOpaqueData`, and
115 `Registry.deleteOpaqueData`.
116 """
117 registry = self.makeRegistry()
118 table = "opaque_table_for_testing"
119 registry.registerOpaqueTable(
120 table,
121 spec=ddl.TableSpec(
122 fields=[
123 ddl.FieldSpec("id", dtype=sqlalchemy.BigInteger, primaryKey=True),
124 ddl.FieldSpec("name", dtype=sqlalchemy.String, length=16, nullable=False),
125 ddl.FieldSpec("count", dtype=sqlalchemy.SmallInteger, nullable=True),
126 ],
127 )
128 )
129 rows = [
130 {"id": 1, "name": "one", "count": None},
131 {"id": 2, "name": "two", "count": 5},
132 {"id": 3, "name": "three", "count": 6},
133 ]
134 registry.insertOpaqueData(table, *rows)
135 self.assertCountEqual(rows, list(registry.fetchOpaqueData(table)))
136 self.assertEqual(rows[0:1], list(registry.fetchOpaqueData(table, id=1)))
137 self.assertEqual(rows[1:2], list(registry.fetchOpaqueData(table, name="two")))
138 self.assertEqual([], list(registry.fetchOpaqueData(table, id=1, name="two")))
139 registry.deleteOpaqueData(table, id=3)
140 self.assertCountEqual(rows[:2], list(registry.fetchOpaqueData(table)))
141 registry.deleteOpaqueData(table)
142 self.assertEqual([], list(registry.fetchOpaqueData(table)))
144 def testDatasetType(self):
145 """Tests for `Registry.registerDatasetType` and
146 `Registry.getDatasetType`.
147 """
148 registry = self.makeRegistry()
149 # Check valid insert
150 datasetTypeName = "test"
151 storageClass = StorageClass("testDatasetType")
152 registry.storageClasses.registerStorageClass(storageClass)
153 dimensions = registry.dimensions.extract(("instrument", "visit"))
154 differentDimensions = registry.dimensions.extract(("instrument", "patch"))
155 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
156 # Inserting for the first time should return True
157 self.assertTrue(registry.registerDatasetType(inDatasetType))
158 outDatasetType1 = registry.getDatasetType(datasetTypeName)
159 self.assertEqual(outDatasetType1, inDatasetType)
161 # Re-inserting should work
162 self.assertFalse(registry.registerDatasetType(inDatasetType))
163 # Except when they are not identical
164 with self.assertRaises(ConflictingDefinitionError):
165 nonIdenticalDatasetType = DatasetType(datasetTypeName, differentDimensions, storageClass)
166 registry.registerDatasetType(nonIdenticalDatasetType)
168 # Template can be None
169 datasetTypeName = "testNoneTemplate"
170 storageClass = StorageClass("testDatasetType2")
171 registry.storageClasses.registerStorageClass(storageClass)
172 dimensions = registry.dimensions.extract(("instrument", "visit"))
173 inDatasetType = DatasetType(datasetTypeName, dimensions, storageClass)
174 registry.registerDatasetType(inDatasetType)
175 outDatasetType2 = registry.getDatasetType(datasetTypeName)
176 self.assertEqual(outDatasetType2, inDatasetType)
178 allTypes = set(registry.queryDatasetTypes())
179 self.assertEqual(allTypes, {outDatasetType1, outDatasetType2})
181 def testDimensions(self):
182 """Tests for `Registry.insertDimensionData`,
183 `Registry.syncDimensionData`, and `Registry.expandDataId`.
184 """
185 registry = self.makeRegistry()
186 dimensionName = "instrument"
187 dimension = registry.dimensions[dimensionName]
188 dimensionValue = {"name": "DummyCam", "visit_max": 10, "exposure_max": 10, "detector_max": 2,
189 "class_name": "lsst.obs.base.Instrument"}
190 registry.insertDimensionData(dimensionName, dimensionValue)
191 # Inserting the same value twice should fail
192 with self.assertRaises(sqlalchemy.exc.IntegrityError):
193 registry.insertDimensionData(dimensionName, dimensionValue)
194 # expandDataId should retrieve the record we just inserted
195 self.assertEqual(
196 registry.expandDataId(
197 instrument="DummyCam",
198 graph=dimension.graph
199 ).records[dimensionName].toDict(),
200 dimensionValue
201 )
202 # expandDataId should raise if there is no record with the given ID.
203 with self.assertRaises(LookupError):
204 registry.expandDataId({"instrument": "Unknown"}, graph=dimension.graph)
205 # band doesn't have a table; insert should fail.
206 with self.assertRaises(TypeError):
207 registry.insertDimensionData("band", {"band": "i"})
208 dimensionName2 = "physical_filter"
209 dimension2 = registry.dimensions[dimensionName2]
210 dimensionValue2 = {"name": "DummyCam_i", "band": "i"}
211 # Missing required dependency ("instrument") should fail
212 with self.assertRaises(KeyError):
213 registry.insertDimensionData(dimensionName2, dimensionValue2)
214 # Adding required dependency should fix the failure
215 dimensionValue2["instrument"] = "DummyCam"
216 registry.insertDimensionData(dimensionName2, dimensionValue2)
217 # expandDataId should retrieve the record we just inserted.
218 self.assertEqual(
219 registry.expandDataId(
220 instrument="DummyCam", physical_filter="DummyCam_i",
221 graph=dimension2.graph
222 ).records[dimensionName2].toDict(),
223 dimensionValue2
224 )
225 # Use syncDimensionData to insert a new record successfully.
226 dimensionName3 = "detector"
227 dimensionValue3 = {"instrument": "DummyCam", "id": 1, "full_name": "one",
228 "name_in_raft": "zero", "purpose": "SCIENCE"}
229 self.assertTrue(registry.syncDimensionData(dimensionName3, dimensionValue3))
230 # Sync that again. Note that one field ("raft") is NULL, and that
231 # should be okay.
232 self.assertFalse(registry.syncDimensionData(dimensionName3, dimensionValue3))
233 # Now try that sync with the same primary key but a different value.
234 # This should fail.
235 with self.assertRaises(ConflictingDefinitionError):
236 registry.syncDimensionData(
237 dimensionName3,
238 {"instrument": "DummyCam", "id": 1, "full_name": "one",
239 "name_in_raft": "four", "purpose": "SCIENCE"}
240 )
242 @unittest.skipIf(np is None, "numpy not available.")
243 def testNumpyDataId(self):
244 """Test that we can use a numpy int in a dataId."""
245 registry = self.makeRegistry()
246 dimensionEntries = [
247 ("instrument", {"instrument": "DummyCam"}),
248 ("physical_filter", {"instrument": "DummyCam", "name": "d-r", "band": "R"}),
249 # Using an np.int64 here fails unless Records.fromDict is also
250 # patched to look for numbers.Integral
251 ("visit", {"instrument": "DummyCam", "id": 42, "name": "fortytwo", "physical_filter": "d-r"}),
252 ]
253 for args in dimensionEntries:
254 registry.insertDimensionData(*args)
256 # Try a normal integer and something that looks like an int but
257 # is not.
258 for visit_id in (42, np.int64(42)):
259 with self.subTest(visit_id=visit_id, id_type=type(visit_id).__name__):
260 expanded = registry.expandDataId({"instrument": "DummyCam", "visit": visit_id})
261 self.assertEqual(expanded["visit"], int(visit_id))
262 self.assertIsInstance(expanded["visit"], int)
264 def testDataIdRelationships(self):
265 """Test that `Registry.expandDataId` raises an exception when the given
266 keys are inconsistent.
267 """
268 registry = self.makeRegistry()
269 self.loadData(registry, "base.yaml")
270 # Insert a few more dimension records for the next test.
271 registry.insertDimensionData(
272 "exposure",
273 {"instrument": "Cam1", "id": 1, "obs_id": "one", "physical_filter": "Cam1-G"},
274 )
275 registry.insertDimensionData(
276 "exposure",
277 {"instrument": "Cam1", "id": 2, "obs_id": "two", "physical_filter": "Cam1-G"},
278 )
279 registry.insertDimensionData(
280 "visit_system",
281 {"instrument": "Cam1", "id": 0, "name": "one-to-one"},
282 )
283 registry.insertDimensionData(
284 "visit",
285 {"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
286 )
287 registry.insertDimensionData(
288 "visit_definition",
289 {"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
290 )
291 with self.assertRaises(InconsistentDataIdError):
292 registry.expandDataId(
293 {"instrument": "Cam1", "visit": 1, "exposure": 2},
294 )
296 def testDataset(self):
297 """Basic tests for `Registry.insertDatasets`, `Registry.getDataset`,
298 and `Registry.removeDatasets`.
299 """
300 registry = self.makeRegistry()
301 self.loadData(registry, "base.yaml")
302 run = "test"
303 registry.registerRun(run)
304 datasetType = registry.getDatasetType("bias")
305 dataId = {"instrument": "Cam1", "detector": 2}
306 ref, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
307 outRef = registry.getDataset(ref.id)
308 self.assertIsNotNone(ref.id)
309 self.assertEqual(ref, outRef)
310 with self.assertRaises(ConflictingDefinitionError):
311 registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
312 registry.removeDatasets([ref])
313 self.assertIsNone(registry.findDataset(datasetType, dataId, collections=[run]))
315 def testFindDataset(self):
316 """Tests for `Registry.findDataset`.
317 """
318 registry = self.makeRegistry()
319 self.loadData(registry, "base.yaml")
320 run = "test"
321 datasetType = registry.getDatasetType("bias")
322 dataId = {"instrument": "Cam1", "detector": 4}
323 registry.registerRun(run)
324 inputRef, = registry.insertDatasets(datasetType, dataIds=[dataId], run=run)
325 outputRef = registry.findDataset(datasetType, dataId, collections=[run])
326 self.assertEqual(outputRef, inputRef)
327 # Check that retrieval with invalid dataId raises
328 with self.assertRaises(LookupError):
329 dataId = {"instrument": "Cam1"} # no detector
330 registry.findDataset(datasetType, dataId, collections=run)
331 # Check that different dataIds match to different datasets
332 dataId1 = {"instrument": "Cam1", "detector": 1}
333 inputRef1, = registry.insertDatasets(datasetType, dataIds=[dataId1], run=run)
334 dataId2 = {"instrument": "Cam1", "detector": 2}
335 inputRef2, = registry.insertDatasets(datasetType, dataIds=[dataId2], run=run)
336 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef1)
337 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef2)
338 self.assertNotEqual(registry.findDataset(datasetType, dataId1, collections=run), inputRef2)
339 self.assertNotEqual(registry.findDataset(datasetType, dataId2, collections=run), inputRef1)
340 # Check that requesting a non-existing dataId returns None
341 nonExistingDataId = {"instrument": "Cam1", "detector": 3}
342 self.assertIsNone(registry.findDataset(datasetType, nonExistingDataId, collections=run))
344 def testDatasetTypeComponentQueries(self):
345 """Test component options when querying for dataset types.
346 """
347 registry = self.makeRegistry()
348 self.loadData(registry, "base.yaml")
349 self.loadData(registry, "datasets.yaml")
350 # Test querying for dataset types with different inputs.
351 # First query for all dataset types; components should only be included
352 # when components=True.
353 self.assertEqual(
354 {"bias", "flat"},
355 NamedValueSet(registry.queryDatasetTypes()).names
356 )
357 self.assertEqual(
358 {"bias", "flat"},
359 NamedValueSet(registry.queryDatasetTypes(components=False)).names
360 )
361 self.assertLess(
362 {"bias", "flat", "bias.wcs", "flat.photoCalib"},
363 NamedValueSet(registry.queryDatasetTypes(components=True)).names
364 )
365 # Use a pattern that can match either parent or components. Again,
366 # components are only returned if components=True.
367 self.assertEqual(
368 {"bias"},
369 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"))).names
370 )
371 self.assertEqual(
372 {"bias"},
373 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=False)).names
374 )
375 self.assertLess(
376 {"bias", "bias.wcs"},
377 NamedValueSet(registry.queryDatasetTypes(re.compile("^bias.*"), components=True)).names
378 )
379 # This pattern matches only a component. In this case we also return
380 # that component dataset type if components=None.
381 self.assertEqual(
382 {"bias.wcs"},
383 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"))).names
384 )
385 self.assertEqual(
386 set(),
387 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=False)).names
388 )
389 self.assertEqual(
390 {"bias.wcs"},
391 NamedValueSet(registry.queryDatasetTypes(re.compile(r"^bias\.wcs"), components=True)).names
392 )
394 def testComponentLookups(self):
395 """Test searching for component datasets via their parents.
396 """
397 registry = self.makeRegistry()
398 self.loadData(registry, "base.yaml")
399 self.loadData(registry, "datasets.yaml")
400 # Test getting the child dataset type (which does still exist in the
401 # Registry), and check for consistency with
402 # DatasetRef.makeComponentRef.
403 collection = "imported_g"
404 parentType = registry.getDatasetType("bias")
405 childType = registry.getDatasetType("bias.wcs")
406 parentRefResolved = registry.findDataset(parentType, collections=collection,
407 instrument="Cam1", detector=1)
408 self.assertIsInstance(parentRefResolved, DatasetRef)
409 self.assertEqual(childType, parentRefResolved.makeComponentRef("wcs").datasetType)
410 # Search for a single dataset with findDataset.
411 childRef1 = registry.findDataset("bias.wcs", collections=collection,
412 dataId=parentRefResolved.dataId)
413 self.assertEqual(childRef1, parentRefResolved.makeComponentRef("wcs"))
414 # Search for detector data IDs constrained by component dataset
415 # existence with queryDataIds.
416 dataIds = registry.queryDataIds(
417 ["detector"],
418 datasets=["bias.wcs"],
419 collections=collection,
420 ).toSet()
421 self.assertEqual(
422 dataIds,
423 DataCoordinateSet(
424 {
425 DataCoordinate.standardize(instrument="Cam1", detector=d, graph=parentType.dimensions)
426 for d in (1, 2, 3)
427 },
428 parentType.dimensions,
429 )
430 )
431 # Search for multiple datasets of a single type with queryDatasets.
432 childRefs2 = set(registry.queryDatasets(
433 "bias.wcs",
434 collections=collection,
435 ))
436 self.assertEqual(
437 {ref.unresolved() for ref in childRefs2},
438 {DatasetRef(childType, dataId) for dataId in dataIds}
439 )
441 def testCollections(self):
442 """Tests for registry methods that manage collections.
443 """
444 registry = self.makeRegistry()
445 self.loadData(registry, "base.yaml")
446 self.loadData(registry, "datasets.yaml")
447 run1 = "imported_g"
448 run2 = "imported_r"
449 # Test setting a collection docstring after it has been created.
450 registry.setCollectionDocumentation(run1, "doc for run1")
451 self.assertEqual(registry.getCollectionDocumentation(run1), "doc for run1")
452 registry.setCollectionDocumentation(run1, None)
453 self.assertIsNone(registry.getCollectionDocumentation(run1))
454 datasetType = "bias"
455 # Find some datasets via their run's collection.
456 dataId1 = {"instrument": "Cam1", "detector": 1}
457 ref1 = registry.findDataset(datasetType, dataId1, collections=run1)
458 self.assertIsNotNone(ref1)
459 dataId2 = {"instrument": "Cam1", "detector": 2}
460 ref2 = registry.findDataset(datasetType, dataId2, collections=run1)
461 self.assertIsNotNone(ref2)
462 # Associate those into a new collection,then look for them there.
463 tag1 = "tag1"
464 registry.registerCollection(tag1, type=CollectionType.TAGGED, doc="doc for tag1")
465 self.assertEqual(registry.getCollectionDocumentation(tag1), "doc for tag1")
466 registry.associate(tag1, [ref1, ref2])
467 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
468 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
469 # Disassociate one and verify that we can't it there anymore...
470 registry.disassociate(tag1, [ref1])
471 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=tag1))
472 # ...but we can still find ref2 in tag1, and ref1 in the run.
473 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=run1), ref1)
474 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
475 collections = set(registry.queryCollections())
476 self.assertEqual(collections, {run1, run2, tag1})
477 # Associate both refs into tag1 again; ref2 is already there, but that
478 # should be a harmless no-op.
479 registry.associate(tag1, [ref1, ref2])
480 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
481 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
482 # Get a different dataset (from a different run) that has the same
483 # dataset type and data ID as ref2.
484 ref2b = registry.findDataset(datasetType, dataId2, collections=run2)
485 self.assertNotEqual(ref2, ref2b)
486 # Attempting to associate that into tag1 should be an error.
487 with self.assertRaises(ConflictingDefinitionError):
488 registry.associate(tag1, [ref2b])
489 # That error shouldn't have messed up what we had before.
490 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
491 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
492 # Attempt to associate the conflicting dataset again, this time with
493 # a dataset that isn't in the collection and won't cause a conflict.
494 # Should also fail without modifying anything.
495 dataId3 = {"instrument": "Cam1", "detector": 3}
496 ref3 = registry.findDataset(datasetType, dataId3, collections=run1)
497 with self.assertRaises(ConflictingDefinitionError):
498 registry.associate(tag1, [ref3, ref2b])
499 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=tag1), ref1)
500 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=tag1), ref2)
501 self.assertIsNone(registry.findDataset(datasetType, dataId3, collections=tag1))
502 # Register a chained collection that searches [tag1, run2]
503 chain1 = "chain1"
504 registry.registerCollection(chain1, type=CollectionType.CHAINED)
505 self.assertIs(registry.getCollectionType(chain1), CollectionType.CHAINED)
506 # Chained collection exists, but has no collections in it.
507 self.assertFalse(registry.getCollectionChain(chain1))
508 # If we query for all collections, we should get the chained collection
509 # only if we don't ask to flatten it (i.e. yield only its children).
510 self.assertEqual(set(registry.queryCollections(flattenChains=False)), {tag1, run1, run2, chain1})
511 self.assertEqual(set(registry.queryCollections(flattenChains=True)), {tag1, run1, run2})
512 # Attempt to set its child collections to something circular; that
513 # should fail.
514 with self.assertRaises(ValueError):
515 registry.setCollectionChain(chain1, [tag1, chain1])
516 # Add the child collections.
517 registry.setCollectionChain(chain1, [tag1, run2])
518 self.assertEqual(
519 list(registry.getCollectionChain(chain1)),
520 [tag1, run2]
521 )
522 # Searching for dataId1 or dataId2 in the chain should return ref1 and
523 # ref2, because both are in tag1.
524 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain1), ref1)
525 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain1), ref2)
526 # Now disassociate ref2 from tag1. The search (for bias) with
527 # dataId2 in chain1 should then:
528 # 1. not find it in tag1
529 # 2. find a different dataset in run2
530 registry.disassociate(tag1, [ref2])
531 ref2b = registry.findDataset(datasetType, dataId2, collections=chain1)
532 self.assertNotEqual(ref2b, ref2)
533 self.assertEqual(ref2b, registry.findDataset(datasetType, dataId2, collections=run2))
534 # Define a new chain so we can test recursive chains.
535 chain2 = "chain2"
536 registry.registerCollection(chain2, type=CollectionType.CHAINED)
537 registry.setCollectionChain(chain2, [run2, chain1])
538 # Query for collections matching a regex.
539 self.assertCountEqual(
540 list(registry.queryCollections(re.compile("imported_."), flattenChains=False)),
541 ["imported_r", "imported_g"]
542 )
543 # Query for collections matching a regex or an explicit str.
544 self.assertCountEqual(
545 list(registry.queryCollections([re.compile("imported_."), "chain1"], flattenChains=False)),
546 ["imported_r", "imported_g", "chain1"]
547 )
548 # Search for bias with dataId1 should find it via tag1 in chain2,
549 # recursing, because is not in run1.
550 self.assertIsNone(registry.findDataset(datasetType, dataId1, collections=run2))
551 self.assertEqual(registry.findDataset(datasetType, dataId1, collections=chain2), ref1)
552 # Search for bias with dataId2 should find it in run2 (ref2b).
553 self.assertEqual(registry.findDataset(datasetType, dataId2, collections=chain2), ref2b)
554 # Search for a flat that is in run2. That should not be found
555 # at the front of chain2, because of the restriction to bias
556 # on run2 there, but it should be found in at the end of chain1.
557 dataId4 = {"instrument": "Cam1", "detector": 3, "physical_filter": "Cam1-R2"}
558 ref4 = registry.findDataset("flat", dataId4, collections=run2)
559 self.assertIsNotNone(ref4)
560 self.assertEqual(ref4, registry.findDataset("flat", dataId4, collections=chain2))
561 # Deleting a collection that's part of a CHAINED collection is not
562 # allowed, and is exception-safe.
563 with self.assertRaises(Exception):
564 registry.removeCollection(run2)
565 self.assertEqual(registry.getCollectionType(run2), CollectionType.RUN)
566 with self.assertRaises(Exception):
567 registry.removeCollection(chain1)
568 self.assertEqual(registry.getCollectionType(chain1), CollectionType.CHAINED)
569 # Actually remove chain2, test that it's gone by asking for its type.
570 registry.removeCollection(chain2)
571 with self.assertRaises(MissingCollectionError):
572 registry.getCollectionType(chain2)
573 # Actually remove run2 and chain1, which should work now.
574 registry.removeCollection(chain1)
575 registry.removeCollection(run2)
576 with self.assertRaises(MissingCollectionError):
577 registry.getCollectionType(run2)
578 with self.assertRaises(MissingCollectionError):
579 registry.getCollectionType(chain1)
580 # Remove tag1 as well, just to test that we can remove TAGGED
581 # collections.
582 registry.removeCollection(tag1)
583 with self.assertRaises(MissingCollectionError):
584 registry.getCollectionType(tag1)
586 def testBasicTransaction(self):
587 """Test that all operations within a single transaction block are
588 rolled back if an exception propagates out of the block.
589 """
590 registry = self.makeRegistry()
591 storageClass = StorageClass("testDatasetType")
592 registry.storageClasses.registerStorageClass(storageClass)
593 with registry.transaction():
594 registry.insertDimensionData("instrument", {"name": "Cam1", "class_name": "A"})
595 with self.assertRaises(ValueError):
596 with registry.transaction():
597 registry.insertDimensionData("instrument", {"name": "Cam2"})
598 raise ValueError("Oops, something went wrong")
599 # Cam1 should exist
600 self.assertEqual(registry.expandDataId(instrument="Cam1").records["instrument"].class_name, "A")
601 # But Cam2 and Cam3 should both not exist
602 with self.assertRaises(LookupError):
603 registry.expandDataId(instrument="Cam2")
604 with self.assertRaises(LookupError):
605 registry.expandDataId(instrument="Cam3")
607 def testNestedTransaction(self):
608 """Test that operations within a transaction block are not rolled back
609 if an exception propagates out of an inner transaction block and is
610 then caught.
611 """
612 registry = self.makeRegistry()
613 dimension = registry.dimensions["instrument"]
614 dataId1 = {"instrument": "DummyCam"}
615 dataId2 = {"instrument": "DummyCam2"}
616 checkpointReached = False
617 with registry.transaction():
618 # This should be added and (ultimately) committed.
619 registry.insertDimensionData(dimension, dataId1)
620 with self.assertRaises(sqlalchemy.exc.IntegrityError):
621 with registry.transaction(savepoint=True):
622 # This does not conflict, and should succeed (but not
623 # be committed).
624 registry.insertDimensionData(dimension, dataId2)
625 checkpointReached = True
626 # This should conflict and raise, triggerring a rollback
627 # of the previous insertion within the same transaction
628 # context, but not the original insertion in the outer
629 # block.
630 registry.insertDimensionData(dimension, dataId1)
631 self.assertTrue(checkpointReached)
632 self.assertIsNotNone(registry.expandDataId(dataId1, graph=dimension.graph))
633 with self.assertRaises(LookupError):
634 registry.expandDataId(dataId2, graph=dimension.graph)
636 def testInstrumentDimensions(self):
637 """Test queries involving only instrument dimensions, with no joins to
638 skymap."""
639 registry = self.makeRegistry()
641 # need a bunch of dimensions and datasets for test
642 registry.insertDimensionData(
643 "instrument",
644 dict(name="DummyCam", visit_max=25, exposure_max=300, detector_max=6)
645 )
646 registry.insertDimensionData(
647 "physical_filter",
648 dict(instrument="DummyCam", name="dummy_r", band="r"),
649 dict(instrument="DummyCam", name="dummy_i", band="i"),
650 )
651 registry.insertDimensionData(
652 "detector",
653 *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
654 )
655 registry.insertDimensionData(
656 "visit_system",
657 dict(instrument="DummyCam", id=1, name="default"),
658 )
659 registry.insertDimensionData(
660 "visit",
661 dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
662 dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
663 dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
664 )
665 registry.insertDimensionData(
666 "exposure",
667 dict(instrument="DummyCam", id=100, obs_id="100", physical_filter="dummy_i"),
668 dict(instrument="DummyCam", id=101, obs_id="101", physical_filter="dummy_i"),
669 dict(instrument="DummyCam", id=110, obs_id="110", physical_filter="dummy_r"),
670 dict(instrument="DummyCam", id=111, obs_id="111", physical_filter="dummy_r"),
671 dict(instrument="DummyCam", id=200, obs_id="200", physical_filter="dummy_r"),
672 dict(instrument="DummyCam", id=201, obs_id="201", physical_filter="dummy_r"),
673 )
674 registry.insertDimensionData(
675 "visit_definition",
676 dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
677 dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
678 dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
679 dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
680 dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
681 dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
682 )
683 # dataset types
684 run1 = "test1_r"
685 run2 = "test2_r"
686 tagged2 = "test2_t"
687 registry.registerRun(run1)
688 registry.registerRun(run2)
689 registry.registerCollection(tagged2)
690 storageClass = StorageClass("testDataset")
691 registry.storageClasses.registerStorageClass(storageClass)
692 rawType = DatasetType(name="RAW",
693 dimensions=registry.dimensions.extract(("instrument", "exposure", "detector")),
694 storageClass=storageClass)
695 registry.registerDatasetType(rawType)
696 calexpType = DatasetType(name="CALEXP",
697 dimensions=registry.dimensions.extract(("instrument", "visit", "detector")),
698 storageClass=storageClass)
699 registry.registerDatasetType(calexpType)
701 # add pre-existing datasets
702 for exposure in (100, 101, 110, 111):
703 for detector in (1, 2, 3):
704 # note that only 3 of 5 detectors have datasets
705 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
706 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run1)
707 # exposures 100 and 101 appear in both run1 and tagged2.
708 # 100 has different datasets in the different collections
709 # 101 has the same dataset in both collections.
710 if exposure == 100:
711 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
712 if exposure in (100, 101):
713 registry.associate(tagged2, [ref])
714 # Add pre-existing datasets to tagged2.
715 for exposure in (200, 201):
716 for detector in (3, 4, 5):
717 # note that only 3 of 5 detectors have datasets
718 dataId = dict(instrument="DummyCam", exposure=exposure, detector=detector)
719 ref, = registry.insertDatasets(rawType, dataIds=[dataId], run=run2)
720 registry.associate(tagged2, [ref])
722 dimensions = DimensionGraph(
723 registry.dimensions,
724 dimensions=(rawType.dimensions.required | calexpType.dimensions.required)
725 )
726 # Test that single dim string works as well as list of str
727 rows = registry.queryDataIds("visit", datasets=rawType, collections=run1).expanded().toSet()
728 rowsI = registry.queryDataIds(["visit"], datasets=rawType, collections=run1).expanded().toSet()
729 self.assertEqual(rows, rowsI)
730 # with empty expression
731 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1).expanded().toSet()
732 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
733 for dataId in rows:
734 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
735 packer1 = registry.dimensions.makePacker("visit_detector", dataId)
736 packer2 = registry.dimensions.makePacker("exposure_detector", dataId)
737 self.assertEqual(packer1.unpack(packer1.pack(dataId)),
738 DataCoordinate.standardize(dataId, graph=packer1.dimensions))
739 self.assertEqual(packer2.unpack(packer2.pack(dataId)),
740 DataCoordinate.standardize(dataId, graph=packer2.dimensions))
741 self.assertNotEqual(packer1.pack(dataId), packer2.pack(dataId))
742 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
743 (100, 101, 110, 111))
744 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11))
745 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
747 # second collection
748 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=tagged2).toSet()
749 self.assertEqual(len(rows), 4*3) # 4 exposures times 3 detectors
750 for dataId in rows:
751 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
752 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
753 (100, 101, 200, 201))
754 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 20))
755 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
757 # with two input datasets
758 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=[run1, tagged2]).toSet()
759 self.assertEqual(len(set(rows)), 6*3) # 6 exposures times 3 detectors; set needed to de-dupe
760 for dataId in rows:
761 self.assertCountEqual(dataId.keys(), ("instrument", "detector", "exposure", "visit"))
762 self.assertCountEqual(set(dataId["exposure"] for dataId in rows),
763 (100, 101, 110, 111, 200, 201))
764 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10, 11, 20))
765 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3, 4, 5))
767 # limit to single visit
768 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
769 where="visit = 10").toSet()
770 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
771 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
772 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
773 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
775 # more limiting expression, using link names instead of Table.column
776 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
777 where="visit = 10 and detector > 1").toSet()
778 self.assertEqual(len(rows), 2*2) # 2 exposures times 2 detectors
779 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (100, 101))
780 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (10,))
781 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (2, 3))
783 # expression excludes everything
784 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
785 where="visit > 1000").toSet()
786 self.assertEqual(len(rows), 0)
788 # Selecting by physical_filter, this is not in the dimensions, but it
789 # is a part of the full expression so it should work too.
790 rows = registry.queryDataIds(dimensions, datasets=rawType, collections=run1,
791 where="physical_filter = 'dummy_r'").toSet()
792 self.assertEqual(len(rows), 2*3) # 2 exposures times 3 detectors
793 self.assertCountEqual(set(dataId["exposure"] for dataId in rows), (110, 111))
794 self.assertCountEqual(set(dataId["visit"] for dataId in rows), (11,))
795 self.assertCountEqual(set(dataId["detector"] for dataId in rows), (1, 2, 3))
797 def testSkyMapDimensions(self):
798 """Tests involving only skymap dimensions, no joins to instrument."""
799 registry = self.makeRegistry()
801 # need a bunch of dimensions and datasets for test, we want
802 # "band" in the test so also have to add physical_filter
803 # dimensions
804 registry.insertDimensionData(
805 "instrument",
806 dict(instrument="DummyCam")
807 )
808 registry.insertDimensionData(
809 "physical_filter",
810 dict(instrument="DummyCam", name="dummy_r", band="r"),
811 dict(instrument="DummyCam", name="dummy_i", band="i"),
812 )
813 registry.insertDimensionData(
814 "skymap",
815 dict(name="DummyMap", hash="sha!".encode("utf8"))
816 )
817 for tract in range(10):
818 registry.insertDimensionData("tract", dict(skymap="DummyMap", id=tract))
819 registry.insertDimensionData(
820 "patch",
821 *[dict(skymap="DummyMap", tract=tract, id=patch, cell_x=0, cell_y=0)
822 for patch in range(10)]
823 )
825 # dataset types
826 run = "test"
827 registry.registerRun(run)
828 storageClass = StorageClass("testDataset")
829 registry.storageClasses.registerStorageClass(storageClass)
830 calexpType = DatasetType(name="deepCoadd_calexp",
831 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
832 "band")),
833 storageClass=storageClass)
834 registry.registerDatasetType(calexpType)
835 mergeType = DatasetType(name="deepCoadd_mergeDet",
836 dimensions=registry.dimensions.extract(("skymap", "tract", "patch")),
837 storageClass=storageClass)
838 registry.registerDatasetType(mergeType)
839 measType = DatasetType(name="deepCoadd_meas",
840 dimensions=registry.dimensions.extract(("skymap", "tract", "patch",
841 "band")),
842 storageClass=storageClass)
843 registry.registerDatasetType(measType)
845 dimensions = DimensionGraph(
846 registry.dimensions,
847 dimensions=(calexpType.dimensions.required | mergeType.dimensions.required
848 | measType.dimensions.required)
849 )
851 # add pre-existing datasets
852 for tract in (1, 3, 5):
853 for patch in (2, 4, 6, 7):
854 dataId = dict(skymap="DummyMap", tract=tract, patch=patch)
855 registry.insertDatasets(mergeType, dataIds=[dataId], run=run)
856 for aFilter in ("i", "r"):
857 dataId = dict(skymap="DummyMap", tract=tract, patch=patch, band=aFilter)
858 registry.insertDatasets(calexpType, dataIds=[dataId], run=run)
860 # with empty expression
861 rows = registry.queryDataIds(dimensions,
862 datasets=[calexpType, mergeType], collections=run).toSet()
863 self.assertEqual(len(rows), 3*4*2) # 4 tracts x 4 patches x 2 filters
864 for dataId in rows:
865 self.assertCountEqual(dataId.keys(), ("skymap", "tract", "patch", "band"))
866 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
867 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
868 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
870 # limit to 2 tracts and 2 patches
871 rows = registry.queryDataIds(dimensions,
872 datasets=[calexpType, mergeType], collections=run,
873 where="tract IN (1, 5) AND patch IN (2, 7)").toSet()
874 self.assertEqual(len(rows), 2*2*2) # 2 tracts x 2 patches x 2 filters
875 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 5))
876 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 7))
877 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i", "r"))
879 # limit to single filter
880 rows = registry.queryDataIds(dimensions,
881 datasets=[calexpType, mergeType], collections=run,
882 where="band = 'i'").toSet()
883 self.assertEqual(len(rows), 3*4*1) # 4 tracts x 4 patches x 2 filters
884 self.assertCountEqual(set(dataId["tract"] for dataId in rows), (1, 3, 5))
885 self.assertCountEqual(set(dataId["patch"] for dataId in rows), (2, 4, 6, 7))
886 self.assertCountEqual(set(dataId["band"] for dataId in rows), ("i",))
888 # expression excludes everything, specifying non-existing skymap is
889 # not a fatal error, it's operator error
890 rows = registry.queryDataIds(dimensions,
891 datasets=[calexpType, mergeType], collections=run,
892 where="skymap = 'Mars'").toSet()
893 self.assertEqual(len(rows), 0)
895 def testSpatialJoin(self):
896 """Test queries that involve spatial overlap joins.
897 """
898 registry = self.makeRegistry()
899 self.loadData(registry, "hsc-rc2-subset.yaml")
901 # Dictionary of spatial DatabaseDimensionElements, keyed by the name of
902 # the TopologicalFamily they belong to. We'll relate all elements in
903 # each family to all of the elements in each other family.
904 families = defaultdict(set)
905 # Dictionary of {element.name: {dataId: region}}.
906 regions = {}
907 for element in registry.dimensions.getDatabaseElements():
908 if element.spatial is not None:
909 families[element.spatial.name].add(element)
910 regions[element.name] = {
911 record.dataId: record.region for record in registry.queryDimensionRecords(element)
912 }
914 # If this check fails, it's not necessarily a problem - it may just be
915 # a reasonable change to the default dimension definitions - but the
916 # test below depends on there being more than one family to do anything
917 # useful.
918 self.assertEqual(len(families), 2)
920 # Overlap DatabaseDimensionElements with each other.
921 for family1, family2 in itertools.combinations(families, 2):
922 for element1, element2 in itertools.product(families[family1], families[family2]):
923 graph = DimensionGraph.union(element1.graph, element2.graph)
924 # Construct expected set of overlapping data IDs via a
925 # brute-force comparison of the regions we've already fetched.
926 expected = {
927 DataCoordinate.standardize(
928 {**dataId1.byName(), **dataId2.byName()},
929 graph=graph
930 )
931 for (dataId1, region1), (dataId2, region2)
932 in itertools.product(regions[element1.name].items(), regions[element2.name].items())
933 if not region1.isDisjointFrom(region2)
934 }
935 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
936 queried = set(registry.queryDataIds(graph))
937 self.assertEqual(expected, queried)
939 # Overlap each DatabaseDimensionElement with the commonSkyPix system.
940 commonSkyPix = registry.dimensions.commonSkyPix
941 for elementName, regions in regions.items():
942 graph = DimensionGraph.union(registry.dimensions[elementName].graph, commonSkyPix.graph)
943 expected = set()
944 for dataId, region in regions.items():
945 for begin, end in commonSkyPix.pixelization.envelope(region):
946 expected.update(
947 DataCoordinate.standardize(
948 {commonSkyPix.name: index, **dataId.byName()},
949 graph=graph
950 )
951 for index in range(begin, end)
952 )
953 self.assertGreater(len(expected), 2, msg="Test that we aren't just comparing empty sets.")
954 queried = set(registry.queryDataIds(graph))
955 self.assertEqual(expected, queried)
957 def testAbstractQuery(self):
958 """Test that we can run a query that just lists the known
959 bands. This is tricky because band is
960 backed by a query against physical_filter.
961 """
962 registry = self.makeRegistry()
963 registry.insertDimensionData("instrument", dict(name="DummyCam"))
964 registry.insertDimensionData(
965 "physical_filter",
966 dict(instrument="DummyCam", name="dummy_i", band="i"),
967 dict(instrument="DummyCam", name="dummy_i2", band="i"),
968 dict(instrument="DummyCam", name="dummy_r", band="r"),
969 )
970 rows = registry.queryDataIds(["band"]).toSet()
971 self.assertCountEqual(
972 rows,
973 [DataCoordinate.standardize(band="i", universe=registry.dimensions),
974 DataCoordinate.standardize(band="r", universe=registry.dimensions)]
975 )
977 def testAttributeManager(self):
978 """Test basic functionality of attribute manager.
979 """
980 # number of attributes with schema versions in a fresh database,
981 # 6 managers with 3 records per manager, plus config for dimensions
982 VERSION_COUNT = 6 * 3 + 1
984 registry = self.makeRegistry()
985 attributes = registry._attributes
987 # check what get() returns for non-existing key
988 self.assertIsNone(attributes.get("attr"))
989 self.assertEqual(attributes.get("attr", ""), "")
990 self.assertEqual(attributes.get("attr", "Value"), "Value")
991 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
993 # cannot store empty key or value
994 with self.assertRaises(ValueError):
995 attributes.set("", "value")
996 with self.assertRaises(ValueError):
997 attributes.set("attr", "")
999 # set value of non-existing key
1000 attributes.set("attr", "value")
1001 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1002 self.assertEqual(attributes.get("attr"), "value")
1004 # update value of existing key
1005 with self.assertRaises(ButlerAttributeExistsError):
1006 attributes.set("attr", "value2")
1008 attributes.set("attr", "value2", force=True)
1009 self.assertEqual(len(list(attributes.items())), VERSION_COUNT + 1)
1010 self.assertEqual(attributes.get("attr"), "value2")
1012 # delete existing key
1013 self.assertTrue(attributes.delete("attr"))
1014 self.assertEqual(len(list(attributes.items())), VERSION_COUNT)
1016 # delete non-existing key
1017 self.assertFalse(attributes.delete("non-attr"))
1019 # store bunch of keys and get the list back
1020 data = [
1021 ("version.core", "1.2.3"),
1022 ("version.dimensions", "3.2.1"),
1023 ("config.managers.opaque", "ByNameOpaqueTableStorageManager"),
1024 ]
1025 for key, value in data:
1026 attributes.set(key, value)
1027 items = dict(attributes.items())
1028 for key, value in data:
1029 self.assertEqual(items[key], value)
1031 def testQueryDatasetsDeduplication(self):
1032 """Test that the findFirst option to queryDatasets selects datasets
1033 from collections in the order given".
1034 """
1035 registry = self.makeRegistry()
1036 self.loadData(registry, "base.yaml")
1037 self.loadData(registry, "datasets.yaml")
1038 self.assertCountEqual(
1039 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"])),
1040 [
1041 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1042 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1043 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1044 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1045 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1046 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1047 ]
1048 )
1049 self.assertCountEqual(
1050 list(registry.queryDatasets("bias", collections=["imported_g", "imported_r"],
1051 findFirst=True)),
1052 [
1053 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1054 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g"),
1055 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g"),
1056 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1057 ]
1058 )
1059 self.assertCountEqual(
1060 list(registry.queryDatasets("bias", collections=["imported_r", "imported_g"],
1061 findFirst=True)),
1062 [
1063 registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1064 registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r"),
1065 registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r"),
1066 registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1067 ]
1068 )
1070 def testQueryResults(self):
1071 """Test querying for data IDs and then manipulating the QueryResults
1072 object returned to perform other queries.
1073 """
1074 registry = self.makeRegistry()
1075 self.loadData(registry, "base.yaml")
1076 self.loadData(registry, "datasets.yaml")
1077 bias = registry.getDatasetType("bias")
1078 flat = registry.getDatasetType("flat")
1079 # Obtain expected results from methods other than those we're testing
1080 # here. That includes:
1081 # - the dimensions of the data IDs we want to query:
1082 expectedGraph = DimensionGraph(registry.dimensions, names=["detector", "physical_filter"])
1083 # - the dimensions of some other data IDs we'll extract from that:
1084 expectedSubsetGraph = DimensionGraph(registry.dimensions, names=["detector"])
1085 # - the data IDs we expect to obtain from the first queries:
1086 expectedDataIds = DataCoordinateSet(
1087 {
1088 DataCoordinate.standardize(instrument="Cam1", detector=d, physical_filter=p,
1089 universe=registry.dimensions)
1090 for d, p in itertools.product({1, 2, 3}, {"Cam1-G", "Cam1-R1", "Cam1-R2"})
1091 },
1092 graph=expectedGraph,
1093 hasFull=False,
1094 hasRecords=False,
1095 )
1096 # - the flat datasets we expect to find from those data IDs, in just
1097 # one collection (so deduplication is irrelevant):
1098 expectedFlats = [
1099 registry.findDataset(flat, instrument="Cam1", detector=1, physical_filter="Cam1-R1",
1100 collections="imported_r"),
1101 registry.findDataset(flat, instrument="Cam1", detector=2, physical_filter="Cam1-R1",
1102 collections="imported_r"),
1103 registry.findDataset(flat, instrument="Cam1", detector=3, physical_filter="Cam1-R2",
1104 collections="imported_r"),
1105 ]
1106 # - the data IDs we expect to extract from that:
1107 expectedSubsetDataIds = expectedDataIds.subset(expectedSubsetGraph)
1108 # - the bias datasets we expect to find from those data IDs, after we
1109 # subset-out the physical_filter dimension, both with duplicates:
1110 expectedAllBiases = [
1111 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1112 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_g"),
1113 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_g"),
1114 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1115 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1116 ]
1117 # - ...and without duplicates:
1118 expectedDeduplicatedBiases = [
1119 registry.findDataset(bias, instrument="Cam1", detector=1, collections="imported_g"),
1120 registry.findDataset(bias, instrument="Cam1", detector=2, collections="imported_r"),
1121 registry.findDataset(bias, instrument="Cam1", detector=3, collections="imported_r"),
1122 ]
1123 # Test against those expected results, using a "lazy" query for the
1124 # data IDs (which re-executes that query each time we use it to do
1125 # something new).
1126 dataIds = registry.queryDataIds(
1127 ["detector", "physical_filter"],
1128 where="detector.purpose = 'SCIENCE'", # this rejects detector=4
1129 )
1130 self.assertEqual(dataIds.graph, expectedGraph)
1131 self.assertEqual(dataIds.toSet(), expectedDataIds)
1132 self.assertCountEqual(
1133 list(
1134 dataIds.findDatasets(
1135 flat,
1136 collections=["imported_r"],
1137 )
1138 ),
1139 expectedFlats,
1140 )
1141 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1142 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1143 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1144 self.assertCountEqual(
1145 list(
1146 subsetDataIds.findDatasets(
1147 bias,
1148 collections=["imported_r", "imported_g"],
1149 findFirst=False
1150 )
1151 ),
1152 expectedAllBiases
1153 )
1154 self.assertCountEqual(
1155 list(
1156 subsetDataIds.findDatasets(
1157 bias,
1158 collections=["imported_r", "imported_g"],
1159 findFirst=True
1160 )
1161 ), expectedDeduplicatedBiases
1162 )
1163 # Materialize the bias dataset queries (only) by putting the results
1164 # into temporary tables, then repeat those tests.
1165 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1166 findFirst=False).materialize() as biases:
1167 self.assertCountEqual(list(biases), expectedAllBiases)
1168 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1169 findFirst=True).materialize() as biases:
1170 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1171 # Materialize the data ID subset query, but not the dataset queries.
1172 with subsetDataIds.materialize() as subsetDataIds:
1173 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1174 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1175 self.assertCountEqual(
1176 list(
1177 subsetDataIds.findDatasets(
1178 bias,
1179 collections=["imported_r", "imported_g"],
1180 findFirst=False
1181 )
1182 ),
1183 expectedAllBiases
1184 )
1185 self.assertCountEqual(
1186 list(
1187 subsetDataIds.findDatasets(
1188 bias,
1189 collections=["imported_r", "imported_g"],
1190 findFirst=True
1191 )
1192 ), expectedDeduplicatedBiases
1193 )
1194 # Materialize the dataset queries, too.
1195 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1196 findFirst=False).materialize() as biases:
1197 self.assertCountEqual(list(biases), expectedAllBiases)
1198 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1199 findFirst=True).materialize() as biases:
1200 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1201 # Materialize the original query, but none of the follow-up queries.
1202 with dataIds.materialize() as dataIds:
1203 self.assertEqual(dataIds.graph, expectedGraph)
1204 self.assertEqual(dataIds.toSet(), expectedDataIds)
1205 self.assertCountEqual(
1206 list(
1207 dataIds.findDatasets(
1208 flat,
1209 collections=["imported_r"],
1210 )
1211 ),
1212 expectedFlats,
1213 )
1214 subsetDataIds = dataIds.subset(expectedSubsetGraph, unique=True)
1215 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1216 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1217 self.assertCountEqual(
1218 list(
1219 subsetDataIds.findDatasets(
1220 bias,
1221 collections=["imported_r", "imported_g"],
1222 findFirst=False
1223 )
1224 ),
1225 expectedAllBiases
1226 )
1227 self.assertCountEqual(
1228 list(
1229 subsetDataIds.findDatasets(
1230 bias,
1231 collections=["imported_r", "imported_g"],
1232 findFirst=True
1233 )
1234 ), expectedDeduplicatedBiases
1235 )
1236 # Materialize just the bias dataset queries.
1237 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1238 findFirst=False).materialize() as biases:
1239 self.assertCountEqual(list(biases), expectedAllBiases)
1240 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1241 findFirst=True).materialize() as biases:
1242 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1243 # Materialize the subset data ID query, but not the dataset
1244 # queries.
1245 with subsetDataIds.materialize() as subsetDataIds:
1246 self.assertEqual(subsetDataIds.graph, expectedSubsetGraph)
1247 self.assertEqual(subsetDataIds.toSet(), expectedSubsetDataIds)
1248 self.assertCountEqual(
1249 list(
1250 subsetDataIds.findDatasets(
1251 bias,
1252 collections=["imported_r", "imported_g"],
1253 findFirst=False
1254 )
1255 ),
1256 expectedAllBiases
1257 )
1258 self.assertCountEqual(
1259 list(
1260 subsetDataIds.findDatasets(
1261 bias,
1262 collections=["imported_r", "imported_g"],
1263 findFirst=True
1264 )
1265 ), expectedDeduplicatedBiases
1266 )
1267 # Materialize the bias dataset queries, too, so now we're
1268 # materializing every single step.
1269 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1270 findFirst=False).materialize() as biases:
1271 self.assertCountEqual(list(biases), expectedAllBiases)
1272 with subsetDataIds.findDatasets(bias, collections=["imported_r", "imported_g"],
1273 findFirst=True).materialize() as biases:
1274 self.assertCountEqual(list(biases), expectedDeduplicatedBiases)
1276 def testEmptyDimensionsQueries(self):
1277 """Test Query and QueryResults objects in the case where there are no
1278 dimensions.
1279 """
1280 # Set up test data: one dataset type, two runs, one dataset in each.
1281 registry = self.makeRegistry()
1282 self.loadData(registry, "base.yaml")
1283 schema = DatasetType("schema", dimensions=registry.dimensions.empty, storageClass="Catalog")
1284 registry.registerDatasetType(schema)
1285 dataId = DataCoordinate.makeEmpty(registry.dimensions)
1286 run1 = "run1"
1287 run2 = "run2"
1288 registry.registerRun(run1)
1289 registry.registerRun(run2)
1290 (dataset1,) = registry.insertDatasets(schema, dataIds=[dataId], run=run1)
1291 (dataset2,) = registry.insertDatasets(schema, dataIds=[dataId], run=run2)
1292 # Query directly for both of the datasets, and each one, one at a time.
1293 self.assertCountEqual(
1294 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=False)),
1295 [dataset1, dataset2]
1296 )
1297 self.assertEqual(
1298 list(registry.queryDatasets(schema, collections=[run1, run2], findFirst=True)),
1299 [dataset1],
1300 )
1301 self.assertEqual(
1302 list(registry.queryDatasets(schema, collections=[run2, run1], findFirst=True)),
1303 [dataset2],
1304 )
1305 # Query for data IDs with no dimensions.
1306 dataIds = registry.queryDataIds([])
1307 self.assertEqual(
1308 dataIds.toSequence(),
1309 DataCoordinateSequence([dataId], registry.dimensions.empty)
1310 )
1311 # Use queried data IDs to find the datasets.
1312 self.assertCountEqual(
1313 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1314 [dataset1, dataset2],
1315 )
1316 self.assertEqual(
1317 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1318 [dataset1],
1319 )
1320 self.assertEqual(
1321 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1322 [dataset2],
1323 )
1324 # Now materialize the data ID query results and repeat those tests.
1325 with dataIds.materialize() as dataIds:
1326 self.assertEqual(
1327 dataIds.toSequence(),
1328 DataCoordinateSequence([dataId], registry.dimensions.empty)
1329 )
1330 self.assertCountEqual(
1331 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1332 [dataset1, dataset2],
1333 )
1334 self.assertEqual(
1335 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1336 [dataset1],
1337 )
1338 self.assertEqual(
1339 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1340 [dataset2],
1341 )
1342 # Query for non-empty data IDs, then subset that to get the empty one.
1343 # Repeat the above tests starting from that.
1344 dataIds = registry.queryDataIds(["instrument"]).subset(registry.dimensions.empty, unique=True)
1345 self.assertEqual(
1346 dataIds.toSequence(),
1347 DataCoordinateSequence([dataId], registry.dimensions.empty)
1348 )
1349 self.assertCountEqual(
1350 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1351 [dataset1, dataset2],
1352 )
1353 self.assertEqual(
1354 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1355 [dataset1],
1356 )
1357 self.assertEqual(
1358 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1359 [dataset2],
1360 )
1361 with dataIds.materialize() as dataIds:
1362 self.assertEqual(
1363 dataIds.toSequence(),
1364 DataCoordinateSequence([dataId], registry.dimensions.empty)
1365 )
1366 self.assertCountEqual(
1367 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1368 [dataset1, dataset2],
1369 )
1370 self.assertEqual(
1371 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1372 [dataset1],
1373 )
1374 self.assertEqual(
1375 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1376 [dataset2],
1377 )
1378 # Query for non-empty data IDs, then materialize, then subset to get
1379 # the empty one. Repeat again.
1380 with registry.queryDataIds(["instrument"]).materialize() as nonEmptyDataIds:
1381 dataIds = nonEmptyDataIds.subset(registry.dimensions.empty, unique=True)
1382 self.assertEqual(
1383 dataIds.toSequence(),
1384 DataCoordinateSequence([dataId], registry.dimensions.empty)
1385 )
1386 self.assertCountEqual(
1387 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1388 [dataset1, dataset2],
1389 )
1390 self.assertEqual(
1391 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1392 [dataset1],
1393 )
1394 self.assertEqual(
1395 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1396 [dataset2],
1397 )
1398 with dataIds.materialize() as dataIds:
1399 self.assertEqual(
1400 dataIds.toSequence(),
1401 DataCoordinateSequence([dataId], registry.dimensions.empty)
1402 )
1403 self.assertCountEqual(
1404 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=False)),
1405 [dataset1, dataset2],
1406 )
1407 self.assertEqual(
1408 list(dataIds.findDatasets(schema, collections=[run1, run2], findFirst=True)),
1409 [dataset1],
1410 )
1411 self.assertEqual(
1412 list(dataIds.findDatasets(schema, collections=[run2, run1], findFirst=True)),
1413 [dataset2],
1414 )
1416 def testCalibrationCollections(self):
1417 """Test operations on `~CollectionType.CALIBRATION` collections,
1418 including `Registry.certify`, `Registry.decertify`, and
1419 `Registry.findDataset`.
1420 """
1421 # Setup - make a Registry, fill it with some datasets in
1422 # non-calibration collections.
1423 registry = self.makeRegistry()
1424 self.loadData(registry, "base.yaml")
1425 self.loadData(registry, "datasets.yaml")
1426 # Set up some timestamps.
1427 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
1428 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
1429 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
1430 t4 = astropy.time.Time('2020-01-01T04:00:00', format="isot", scale="tai")
1431 t5 = astropy.time.Time('2020-01-01T05:00:00', format="isot", scale="tai")
1432 allTimespans = [
1433 Timespan(a, b) for a, b in itertools.combinations([None, t1, t2, t3, t4, t5, None], r=2)
1434 ]
1435 # Get references to some datasets.
1436 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
1437 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
1438 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
1439 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
1440 # Register the main calibration collection we'll be working with.
1441 collection = "Cam1/calibs/default"
1442 registry.registerCollection(collection, type=CollectionType.CALIBRATION)
1443 # Cannot associate into a calibration collection (no timespan).
1444 with self.assertRaises(TypeError):
1445 registry.associate(collection, [bias2a])
1446 # Certify 2a dataset with [t2, t4) validity.
1447 registry.certify(collection, [bias2a], Timespan(begin=t2, end=t4))
1448 # We should not be able to certify 2b with anything overlapping that
1449 # window.
1450 with self.assertRaises(ConflictingDefinitionError):
1451 registry.certify(collection, [bias2b], Timespan(begin=None, end=t3))
1452 with self.assertRaises(ConflictingDefinitionError):
1453 registry.certify(collection, [bias2b], Timespan(begin=None, end=t5))
1454 with self.assertRaises(ConflictingDefinitionError):
1455 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t3))
1456 with self.assertRaises(ConflictingDefinitionError):
1457 registry.certify(collection, [bias2b], Timespan(begin=t1, end=t5))
1458 with self.assertRaises(ConflictingDefinitionError):
1459 registry.certify(collection, [bias2b], Timespan(begin=t1, end=None))
1460 with self.assertRaises(ConflictingDefinitionError):
1461 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t3))
1462 with self.assertRaises(ConflictingDefinitionError):
1463 registry.certify(collection, [bias2b], Timespan(begin=t2, end=t5))
1464 with self.assertRaises(ConflictingDefinitionError):
1465 registry.certify(collection, [bias2b], Timespan(begin=t2, end=None))
1466 # We should be able to certify 3a with a range overlapping that window,
1467 # because it's for a different detector.
1468 # We'll certify 3a over [t1, t3).
1469 registry.certify(collection, [bias3a], Timespan(begin=t1, end=t3))
1470 # Now we'll certify 2b and 3b together over [t4, ∞).
1471 registry.certify(collection, [bias2b, bias3b], Timespan(begin=t4, end=None))
1473 # Fetch all associations and check that they are what we expect.
1474 self.assertCountEqual(
1475 list(
1476 registry.queryDatasetAssociations(
1477 "bias",
1478 collections=[collection, "imported_g", "imported_r"],
1479 )
1480 ),
1481 [
1482 DatasetAssociation(
1483 ref=registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g"),
1484 collection="imported_g",
1485 timespan=None,
1486 ),
1487 DatasetAssociation(
1488 ref=registry.findDataset("bias", instrument="Cam1", detector=4, collections="imported_r"),
1489 collection="imported_r",
1490 timespan=None,
1491 ),
1492 DatasetAssociation(ref=bias2a, collection="imported_g", timespan=None),
1493 DatasetAssociation(ref=bias3a, collection="imported_g", timespan=None),
1494 DatasetAssociation(ref=bias2b, collection="imported_r", timespan=None),
1495 DatasetAssociation(ref=bias3b, collection="imported_r", timespan=None),
1496 DatasetAssociation(ref=bias2a, collection=collection, timespan=Timespan(begin=t2, end=t4)),
1497 DatasetAssociation(ref=bias3a, collection=collection, timespan=Timespan(begin=t1, end=t3)),
1498 DatasetAssociation(ref=bias2b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1499 DatasetAssociation(ref=bias3b, collection=collection, timespan=Timespan(begin=t4, end=None)),
1500 ]
1501 )
1503 class Ambiguous:
1504 """Tag class to denote lookups that are expected to be ambiguous.
1505 """
1506 pass
1508 def assertLookup(detector: int, timespan: Timespan,
1509 expected: Optional[Union[DatasetRef, Type[Ambiguous]]]) -> None:
1510 """Local function that asserts that a bias lookup returns the given
1511 expected result.
1512 """
1513 if expected is Ambiguous:
1514 with self.assertRaises(RuntimeError):
1515 registry.findDataset("bias", collections=collection, instrument="Cam1",
1516 detector=detector, timespan=timespan)
1517 else:
1518 self.assertEqual(
1519 expected,
1520 registry.findDataset("bias", collections=collection, instrument="Cam1",
1521 detector=detector, timespan=timespan)
1522 )
1524 # Systematically test lookups against expected results.
1525 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1526 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1527 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1528 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1529 assertLookup(detector=2, timespan=Timespan(None, t5), expected=Ambiguous)
1530 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1531 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1532 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1533 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1534 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=Ambiguous)
1535 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1536 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1537 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1538 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=Ambiguous)
1539 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1540 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=bias2a)
1541 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=Ambiguous)
1542 assertLookup(detector=2, timespan=Timespan(t3, None), expected=Ambiguous)
1543 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=bias2b)
1544 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1545 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1546 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1547 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1548 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1549 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1550 assertLookup(detector=3, timespan=Timespan(None, t5), expected=Ambiguous)
1551 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1552 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1553 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1554 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1555 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=Ambiguous)
1556 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1557 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1558 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1559 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=Ambiguous)
1560 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1561 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1562 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=bias3b)
1563 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1564 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=bias3b)
1565 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1566 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1568 # Decertify [t3, t5) for all data IDs, and do test lookups again.
1569 # This should truncate bias2a to [t2, t3), leave bias3a unchanged at
1570 # [t1, t3), and truncate bias2b and bias3b to [t5, ∞).
1571 registry.decertify(collection=collection, datasetType="bias", timespan=Timespan(t3, t5))
1572 assertLookup(detector=2, timespan=Timespan(None, t1), expected=None)
1573 assertLookup(detector=2, timespan=Timespan(None, t2), expected=None)
1574 assertLookup(detector=2, timespan=Timespan(None, t3), expected=bias2a)
1575 assertLookup(detector=2, timespan=Timespan(None, t4), expected=bias2a)
1576 assertLookup(detector=2, timespan=Timespan(None, t5), expected=bias2a)
1577 assertLookup(detector=2, timespan=Timespan(None, None), expected=Ambiguous)
1578 assertLookup(detector=2, timespan=Timespan(t1, t2), expected=None)
1579 assertLookup(detector=2, timespan=Timespan(t1, t3), expected=bias2a)
1580 assertLookup(detector=2, timespan=Timespan(t1, t4), expected=bias2a)
1581 assertLookup(detector=2, timespan=Timespan(t1, t5), expected=bias2a)
1582 assertLookup(detector=2, timespan=Timespan(t1, None), expected=Ambiguous)
1583 assertLookup(detector=2, timespan=Timespan(t2, t3), expected=bias2a)
1584 assertLookup(detector=2, timespan=Timespan(t2, t4), expected=bias2a)
1585 assertLookup(detector=2, timespan=Timespan(t2, t5), expected=bias2a)
1586 assertLookup(detector=2, timespan=Timespan(t2, None), expected=Ambiguous)
1587 assertLookup(detector=2, timespan=Timespan(t3, t4), expected=None)
1588 assertLookup(detector=2, timespan=Timespan(t3, t5), expected=None)
1589 assertLookup(detector=2, timespan=Timespan(t3, None), expected=bias2b)
1590 assertLookup(detector=2, timespan=Timespan(t4, t5), expected=None)
1591 assertLookup(detector=2, timespan=Timespan(t4, None), expected=bias2b)
1592 assertLookup(detector=2, timespan=Timespan(t5, None), expected=bias2b)
1593 assertLookup(detector=3, timespan=Timespan(None, t1), expected=None)
1594 assertLookup(detector=3, timespan=Timespan(None, t2), expected=bias3a)
1595 assertLookup(detector=3, timespan=Timespan(None, t3), expected=bias3a)
1596 assertLookup(detector=3, timespan=Timespan(None, t4), expected=bias3a)
1597 assertLookup(detector=3, timespan=Timespan(None, t5), expected=bias3a)
1598 assertLookup(detector=3, timespan=Timespan(None, None), expected=Ambiguous)
1599 assertLookup(detector=3, timespan=Timespan(t1, t2), expected=bias3a)
1600 assertLookup(detector=3, timespan=Timespan(t1, t3), expected=bias3a)
1601 assertLookup(detector=3, timespan=Timespan(t1, t4), expected=bias3a)
1602 assertLookup(detector=3, timespan=Timespan(t1, t5), expected=bias3a)
1603 assertLookup(detector=3, timespan=Timespan(t1, None), expected=Ambiguous)
1604 assertLookup(detector=3, timespan=Timespan(t2, t3), expected=bias3a)
1605 assertLookup(detector=3, timespan=Timespan(t2, t4), expected=bias3a)
1606 assertLookup(detector=3, timespan=Timespan(t2, t5), expected=bias3a)
1607 assertLookup(detector=3, timespan=Timespan(t2, None), expected=Ambiguous)
1608 assertLookup(detector=3, timespan=Timespan(t3, t4), expected=None)
1609 assertLookup(detector=3, timespan=Timespan(t3, t5), expected=None)
1610 assertLookup(detector=3, timespan=Timespan(t3, None), expected=bias3b)
1611 assertLookup(detector=3, timespan=Timespan(t4, t5), expected=None)
1612 assertLookup(detector=3, timespan=Timespan(t4, None), expected=bias3b)
1613 assertLookup(detector=3, timespan=Timespan(t5, None), expected=bias3b)
1615 # Decertify everything, this time with explicit data IDs, then check
1616 # that no lookups succeed.
1617 registry.decertify(
1618 collection, "bias", Timespan(None, None),
1619 dataIds=[
1620 dict(instrument="Cam1", detector=2),
1621 dict(instrument="Cam1", detector=3),
1622 ]
1623 )
1624 for detector in (2, 3):
1625 for timespan in allTimespans:
1626 assertLookup(detector=detector, timespan=timespan, expected=None)
1627 # Certify bias2a and bias3a over (-∞, ∞), check that all lookups return
1628 # those.
1629 registry.certify(collection, [bias2a, bias3a], Timespan(None, None),)
1630 for timespan in allTimespans:
1631 assertLookup(detector=2, timespan=timespan, expected=bias2a)
1632 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1633 # Decertify just bias2 over [t2, t4).
1634 # This should split a single certification row into two (and leave the
1635 # other existing row, for bias3a, alone).
1636 registry.decertify(collection, "bias", Timespan(t2, t4),
1637 dataIds=[dict(instrument="Cam1", detector=2)])
1638 for timespan in allTimespans:
1639 assertLookup(detector=3, timespan=timespan, expected=bias3a)
1640 overlapsBefore = timespan.overlaps(Timespan(None, t2))
1641 overlapsAfter = timespan.overlaps(Timespan(t4, None))
1642 if overlapsBefore and overlapsAfter:
1643 expected = Ambiguous
1644 elif overlapsBefore or overlapsAfter:
1645 expected = bias2a
1646 else:
1647 expected = None
1648 assertLookup(detector=2, timespan=timespan, expected=expected)
1650 def testIngestTimeQuery(self):
1652 registry = self.makeRegistry()
1653 self.loadData(registry, "base.yaml")
1654 self.loadData(registry, "datasets.yaml")
1656 datasets = list(registry.queryDatasets(..., collections=...))
1657 len0 = len(datasets)
1658 self.assertGreater(len0, 0)
1660 where = "ingest_date > T'2000-01-01'"
1661 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1662 len1 = len(datasets)
1663 self.assertEqual(len0, len1)
1665 # no one will ever use this piece of software in 30 years
1666 where = "ingest_date > T'2050-01-01'"
1667 datasets = list(registry.queryDatasets(..., collections=..., where=where))
1668 len2 = len(datasets)
1669 self.assertEqual(len2, 0)