Coverage for tests/test_simpleButler.py: 15%
286 statements
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
« prev ^ index » next coverage.py v6.4, created at 2022-05-24 02:27 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import json
25import os
26import re
27import tempfile
28import unittest
29import uuid
30from typing import Any
32try:
33 import numpy as np
34except ImportError:
35 np = None
37import astropy.time
38from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetRef, DatasetType, Registry, Timespan
39from lsst.daf.butler.registry import ConflictingDefinitionError, RegistryConfig, RegistryDefaults
40from lsst.daf.butler.tests import DatastoreMock
41from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
43TESTDIR = os.path.abspath(os.path.dirname(__file__))
46class SimpleButlerTestCase(unittest.TestCase):
47 """Tests for butler (including import/export functionality) that should not
48 depend on the Registry Database backend or Datastore implementation, and
49 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
50 """
52 datasetsManager = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManager"
53 datasetsImportFile = "datasets.yaml"
54 datasetsIdType = int
56 def setUp(self):
57 self.root = makeTestTempDir(TESTDIR)
59 def tearDown(self):
60 removeTestTempDir(self.root)
62 def makeButler(self, **kwargs: Any) -> Butler:
63 """Return new Butler instance on each call."""
64 config = ButlerConfig()
66 # make separate temporary directory for registry of this instance
67 tmpdir = tempfile.mkdtemp(dir=self.root)
68 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
69 config["registry", "managers", "datasets"] = self.datasetsManager
70 config["root"] = self.root
72 # have to make a registry first
73 registryConfig = RegistryConfig(config.get("registry"))
74 Registry.createFromConfig(registryConfig)
76 butler = Butler(config, **kwargs)
77 DatastoreMock.apply(butler)
78 return butler
80 def comparableRef(self, ref: DatasetRef) -> DatasetRef:
81 """Return a DatasetRef that can be compared to a DatasetRef from
82 other repository.
84 For repositories that do not support round-trip of ID values this
85 method returns unresolved DatasetRef, for round-trip-safe repos it
86 returns unchanged ref.
87 """
88 return ref if self.datasetsIdType is uuid.UUID else ref.unresolved()
90 def testReadBackwardsCompatibility(self):
91 """Test that we can read an export file written by a previous version
92 and commit to the daf_butler git repo.
94 Notes
95 -----
96 At present this export file includes only dimension data, not datasets,
97 which greatly limits the usefulness of this test. We should address
98 this at some point, but I think it's best to wait for the changes to
99 the export format required for CALIBRATION collections to land.
100 """
101 butler = self.makeButler(writeable=True)
102 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
103 # Spot-check a few things, but the most important test is just that
104 # the above does not raise.
105 self.assertGreaterEqual(
106 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")),
107 set(range(104)), # should have all science CCDs; may have some focus ones.
108 )
109 self.assertGreaterEqual(
110 {
111 (record.id, record.physical_filter)
112 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
113 },
114 {
115 (27136, "HSC-Z"),
116 (11694, "HSC-G"),
117 (23910, "HSC-R"),
118 (11720, "HSC-Y"),
119 (23900, "HSC-R"),
120 (22646, "HSC-Y"),
121 (1248, "HSC-I"),
122 (19680, "HSC-I"),
123 (1240, "HSC-I"),
124 (424, "HSC-Y"),
125 (19658, "HSC-I"),
126 (344, "HSC-Y"),
127 (1218, "HSC-R"),
128 (1190, "HSC-Z"),
129 (23718, "HSC-R"),
130 (11700, "HSC-G"),
131 (26036, "HSC-G"),
132 (23872, "HSC-R"),
133 (1170, "HSC-Z"),
134 (1876, "HSC-Y"),
135 },
136 )
138 def testDatasetTransfers(self):
139 """Test exporting all datasets from a repo and then importing them all
140 back in again.
141 """
142 # Import data to play with.
143 butler1 = self.makeButler(writeable=True)
144 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
145 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
146 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
147 # Export all datasets.
148 with butler1.export(filename=file.name) as exporter:
149 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
150 # Import it all again.
151 butler2 = self.makeButler(writeable=True)
152 butler2.import_(filename=file.name)
153 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
154 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
155 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1))
156 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2))
157 self.assertCountEqual(
158 [self.comparableRef(ref) for ref in datasets1],
159 [self.comparableRef(ref) for ref in datasets2],
160 )
162 def testComponentExport(self):
163 """Test exporting component datasets and then importing them.
165 This test intentionally does not depend on whether just the component
166 is exported and then imported vs. the full composite dataset, because
167 I don't want it to assume more than it needs to about the
168 implementation.
169 """
170 # Import data to play with.
171 butler1 = self.makeButler(writeable=True)
172 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
173 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
174 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
175 # Export all datasets.
176 with butler1.export(filename=file.name) as exporter:
177 exporter.saveDatasets(butler1.registry.queryDatasets("flat.psf", collections=...))
178 # Import it all again.
179 butler2 = self.makeButler(writeable=True)
180 butler2.import_(filename=file.name)
181 datasets1 = list(butler1.registry.queryDatasets("flat.psf", collections=...))
182 datasets2 = list(butler2.registry.queryDatasets("flat.psf", collections=...))
183 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1))
184 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2))
185 self.assertCountEqual(
186 [self.comparableRef(ref) for ref in datasets1],
187 [self.comparableRef(ref) for ref in datasets2],
188 )
190 def testImportTwice(self):
191 """Test exporting dimension records and datasets from a repo and then
192 importing them all back in again twice.
193 """
194 if self.datasetsIdType is not uuid.UUID:
195 self.skipTest("This test can only work for UUIDs")
196 # Import data to play with.
197 butler1 = self.makeButler(writeable=True)
198 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
199 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
200 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file:
201 # Export all datasets.
202 with butler1.export(filename=file.name) as exporter:
203 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
204 butler2 = self.makeButler(writeable=True)
205 # Import it once.
206 butler2.import_(filename=file.name)
207 # Import it again
208 butler2.import_(filename=file.name)
209 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
210 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
211 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets1))
212 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets2))
213 self.assertCountEqual(
214 [self.comparableRef(ref) for ref in datasets1],
215 [self.comparableRef(ref) for ref in datasets2],
216 )
218 def testDatasetImportReuseIds(self):
219 """Test for import that should preserve dataset IDs.
221 This test assumes that dataset IDs in datasets YAML are different from
222 what auto-incremental insert would produce.
223 """
224 if self.datasetsIdType is not int:
225 self.skipTest("This test can only work for UUIDs")
226 # Import data to play with.
227 butler = self.makeButler(writeable=True)
228 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
229 filename = os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)
230 butler.import_(filename=filename, reuseIds=True)
231 datasets = list(butler.registry.queryDatasets(..., collections=...))
232 self.assertTrue(all(isinstance(ref.id, self.datasetsIdType) for ref in datasets))
233 # IDs are copied from YAML, list needs to be updated if file contents
234 # is changed.
235 self.assertCountEqual(
236 [ref.id for ref in datasets],
237 [1001, 1002, 1003, 1010, 1020, 1030, 2001, 2002, 2003, 2010, 2020, 2030, 2040],
238 )
240 # Try once again, it will raise
241 with self.assertRaises(ConflictingDefinitionError):
242 butler.import_(filename=filename, reuseIds=True)
244 def testCollectionTransfers(self):
245 """Test exporting and then importing collections of various types."""
246 # Populate a registry with some datasets.
247 butler1 = self.makeButler(writeable=True)
248 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
249 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
250 registry1 = butler1.registry
251 # Add some more collections.
252 registry1.registerRun("run1")
253 registry1.registerCollection("tag1", CollectionType.TAGGED)
254 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
255 registry1.registerCollection("chain1", CollectionType.CHAINED)
256 registry1.registerCollection("chain2", CollectionType.CHAINED)
257 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
258 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
259 # Associate some datasets into the TAGGED and CALIBRATION collections.
260 flats1 = list(registry1.queryDatasets("flat", collections=...))
261 registry1.associate("tag1", flats1)
262 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
263 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
264 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
265 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
266 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
267 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
268 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
269 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
270 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
271 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
272 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
273 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty())
275 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
276 # Export all collections, and some datasets.
277 with butler1.export(filename=file.name) as exporter:
278 # Sort results to put chain1 before chain2, which is
279 # intentionally not topological order.
280 for collection in sorted(registry1.queryCollections()):
281 exporter.saveCollection(collection)
282 exporter.saveDatasets(flats1)
283 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b])
284 # Import them into a new registry.
285 butler2 = self.makeButler(writeable=True)
286 butler2.import_(filename=file.name)
287 registry2 = butler2.registry
288 # Check that it all round-tripped, starting with the collections
289 # themselves.
290 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
291 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
292 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
293 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
294 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
295 self.assertEqual(
296 list(registry2.getCollectionChain("chain1")),
297 ["tag1", "run1", "chain2"],
298 )
299 self.assertEqual(
300 list(registry2.getCollectionChain("chain2")),
301 ["calibration1", "run1"],
302 )
303 # Check that tag collection contents are the same.
304 self.maxDiff = None
305 self.assertCountEqual(
306 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")],
307 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")],
308 )
309 # Check that calibration collection contents are the same.
310 self.assertCountEqual(
311 [
312 (self.comparableRef(assoc.ref), assoc.timespan)
313 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")
314 ],
315 [
316 (self.comparableRef(assoc.ref), assoc.timespan)
317 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")
318 ],
319 )
321 def testButlerGet(self):
322 """Test that butler.get can work with different variants."""
324 # Import data to play with.
325 butler = self.makeButler(writeable=True)
326 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
327 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
329 # Find the DatasetRef for a flat
330 coll = "imported_g"
331 flat2g = butler.registry.findDataset(
332 "flat", instrument="Cam1", detector=2, physical_filter="Cam1-G", collections=coll
333 )
335 # Create a numpy integer to check that works fine
336 detector_np = np.int64(2) if np else 2
337 print(type(detector_np))
339 # Try to get it using different variations of dataId + keyword
340 # arguments
341 # Note that instrument.class_name does not work
342 variants = (
343 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
344 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
345 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
346 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
347 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
348 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
349 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
350 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
351 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
352 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
353 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
354 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
355 (
356 {"detector.name_in_raft": "b", "detector.raft": "A"},
357 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
358 ),
359 (
360 {
361 "detector.name_in_raft": "b",
362 "detector.raft": "A",
363 "instrument": "Cam1",
364 "physical_filter": "Cam1-G",
365 },
366 {},
367 ),
368 # Duplicate (but valid) information.
369 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}),
370 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}),
371 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
372 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
373 )
375 for dataId, kwds in variants:
376 try:
377 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
378 except Exception as e:
379 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e
380 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
382 # Check that bad combinations raise.
383 variants = (
384 # Inconsistent detector information.
385 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}),
386 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
387 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
388 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
389 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
390 # Under-specified.
391 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
392 # Spurious kwargs.
393 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}),
394 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
395 )
396 for dataId, kwds in variants:
397 with self.assertRaises(ValueError):
398 butler.get("flat", dataId=dataId, collections=coll, **kwds)
400 def testGetCalibration(self):
401 """Test that `Butler.get` can be used to fetch from
402 `~CollectionType.CALIBRATION` collections if the data ID includes
403 extra dimensions with temporal information.
404 """
405 # Import data to play with.
406 butler = self.makeButler(writeable=True)
407 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
408 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
409 # Certify some biases into a CALIBRATION collection.
410 registry = butler.registry
411 registry.registerCollection("calibs", CollectionType.CALIBRATION)
412 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
413 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
414 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
415 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
416 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
417 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
418 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
419 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
420 registry.certify("calibs", [bias2b], Timespan(t2, None))
421 registry.certify("calibs", [bias3b], Timespan(t2, t3))
422 # Insert some exposure dimension data.
423 registry.insertDimensionData(
424 "exposure",
425 {
426 "instrument": "Cam1",
427 "id": 3,
428 "obs_id": "three",
429 "timespan": Timespan(t1, t2),
430 "physical_filter": "Cam1-G",
431 "day_obs": 20201114,
432 "seq_num": 55,
433 },
434 {
435 "instrument": "Cam1",
436 "id": 4,
437 "obs_id": "four",
438 "timespan": Timespan(t2, t3),
439 "physical_filter": "Cam1-G",
440 "day_obs": 20211114,
441 "seq_num": 42,
442 },
443 )
444 # Get some biases from raw-like data IDs.
445 bias2a_id, _ = butler.get(
446 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs"
447 )
448 self.assertEqual(bias2a_id, bias2a.id)
449 bias3b_id, _ = butler.get(
450 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs"
451 )
452 self.assertEqual(bias3b_id, bias3b.id)
454 # Get using the kwarg form
455 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs")
456 self.assertEqual(bias3b_id, bias3b.id)
458 # Do it again but using the record information
459 bias2a_id, _ = butler.get(
460 "bias",
461 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"},
462 collections="calibs",
463 )
464 self.assertEqual(bias2a_id, bias2a.id)
465 bias3b_id, _ = butler.get(
466 "bias",
467 {"exposure.obs_id": "four", "detector.full_name": "Ba"},
468 collections="calibs",
469 instrument="Cam1",
470 )
471 self.assertEqual(bias3b_id, bias3b.id)
473 # And again but this time using the alternate value rather than
474 # the primary.
475 bias3b_id, _ = butler.get(
476 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1"
477 )
478 self.assertEqual(bias3b_id, bias3b.id)
480 # And again but this time using the alternate value rather than
481 # the primary and do it in the keyword arguments.
482 bias3b_id, _ = butler.get(
483 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1"
484 )
485 self.assertEqual(bias3b_id, bias3b.id)
487 # Now with implied record columns
488 bias3b_id, _ = butler.get(
489 "bias",
490 day_obs=20211114,
491 seq_num=42,
492 raft="B",
493 name_in_raft="a",
494 collections="calibs",
495 instrument="Cam1",
496 )
497 self.assertEqual(bias3b_id, bias3b.id)
499 # Allow a fully-specified dataId and unnecessary extra information
500 # that comes from the record.
501 bias3b_id, _ = butler.get(
502 "bias",
503 dataId=dict(
504 exposure=4,
505 day_obs=20211114,
506 seq_num=42,
507 detector=3,
508 instrument="Cam1",
509 ),
510 collections="calibs",
511 )
512 self.assertEqual(bias3b_id, bias3b.id)
514 # Extra but inconsistent record values are a problem.
515 with self.assertRaises(ValueError):
516 bias3b_id, _ = butler.get(
517 "bias",
518 exposure=3,
519 day_obs=20211114,
520 seq_num=42,
521 detector=3,
522 collections="calibs",
523 instrument="Cam1",
524 )
526 # Ensure that spurious kwargs cause an exception.
527 with self.assertRaises(ValueError):
528 butler.get(
529 "bias",
530 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"},
531 collections="calibs",
532 instrument="Cam1",
533 )
535 with self.assertRaises(ValueError):
536 butler.get(
537 "bias",
538 day_obs=20211114,
539 seq_num=42,
540 raft="B",
541 name_in_raft="a",
542 collections="calibs",
543 instrument="Cam1",
544 immediate=True,
545 )
547 def testRegistryDefaults(self):
548 """Test that we can default the collections and some data ID keys when
549 constructing a butler.
551 Many tests that use default run already exist in ``test_butler.py``, so
552 that isn't tested here. And while most of this functionality is
553 implemented in `Registry`, we test it here instead of
554 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
555 database backend at all.
556 """
557 butler = self.makeButler(writeable=True)
558 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
559 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
560 # Need to actually set defaults later, not at construction, because
561 # we need to import the instrument before we can use it as a default.
562 # Don't set a default instrument value for data IDs, because 'Cam1'
563 # should be inferred by virtue of that being the only value in the
564 # input collections.
565 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
566 # Use findDataset without collections or instrument.
567 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G")
568 # Do the same with Butler.get; this should ultimately invoke a lot of
569 # the same code, so it's a bit circular, but mostly we're checking that
570 # it works at all.
571 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
572 self.assertEqual(ref.id, dataset_id)
573 # Query for datasets. Test defaulting the data ID in both kwargs and
574 # in the WHERE expression.
575 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
576 self.assertEqual({ref}, queried_refs_1)
577 queried_refs_2 = set(
578 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'")
579 )
580 self.assertEqual({ref}, queried_refs_2)
581 # Query for data IDs with a dataset constraint.
582 queried_data_ids = set(
583 butler.registry.queryDataIds(
584 {"instrument", "detector", "physical_filter"},
585 datasets={"flat"},
586 detector=2,
587 physical_filter="Cam1-G",
588 )
589 )
590 self.assertEqual({ref.dataId}, queried_data_ids)
591 # Add another instrument to the repo, and a dataset that uses it to
592 # the `imported_g` collection.
593 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
594 camera = DatasetType(
595 "camera",
596 dimensions=butler.registry.dimensions["instrument"].graph,
597 storageClass="Camera",
598 )
599 butler.registry.registerDatasetType(camera)
600 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
601 # Initialize a new butler with `imported_g` as its default run.
602 # This should not have a default instrument, because there are two.
603 # Pass run instead of collections; this should set both.
604 butler2 = Butler(butler=butler, run="imported_g")
605 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
606 self.assertEqual(butler2.registry.defaults.run, "imported_g")
607 self.assertFalse(butler2.registry.defaults.dataId)
608 # Initialize a new butler with an instrument default explicitly given.
609 # Set collections instead of run, which should then be None.
610 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2")
611 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
612 self.assertIsNone(butler3.registry.defaults.run, None)
613 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"})
615 def testJson(self):
616 """Test JSON serialization mediated by registry."""
617 butler = self.makeButler(writeable=True)
618 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
619 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
620 # Need to actually set defaults later, not at construction, because
621 # we need to import the instrument before we can use it as a default.
622 # Don't set a default instrument value for data IDs, because 'Cam1'
623 # should be inferred by virtue of that being the only value in the
624 # input collections.
625 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
626 # Use findDataset without collections or instrument.
627 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G")
629 # Transform the ref and dataset type to and from JSON
630 # and check that it can be reconstructed properly
632 # Do it with the ref and a component ref in minimal and standard form
633 compRef = ref.makeComponentRef("wcs")
635 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType):
636 for minimal in (False, True):
637 json_str = test_item.to_json(minimal=minimal)
638 from_json = type(test_item).from_json(json_str, registry=butler.registry)
639 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry")
641 # for minimal=False case also do a test without registry
642 if not minimal:
643 from_json = type(test_item).from_json(json_str, universe=butler.registry.dimensions)
644 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe")
646 def testJsonDimensionRecordsAndHtmlRepresentation(self):
647 # Dimension Records
648 butler = self.makeButler(writeable=True)
649 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
651 for dimension in ("detector", "visit"):
652 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC")
653 for r in records:
654 for minimal in (True, False):
655 json_str = r.to_json(minimal=minimal)
656 r_json = type(r).from_json(json_str, registry=butler.registry)
657 self.assertEqual(r_json, r)
658 # check with direct method
659 simple = r.to_simple()
660 fromDirect = type(simple).direct(**json.loads(json_str))
661 self.assertEqual(simple, fromDirect)
662 # Also check equality of each of the components as dicts
663 self.assertEqual(r_json.toDict(), r.toDict())
665 # check the html representation of records
666 r_html = r._repr_html_()
667 self.assertTrue(isinstance(r_html, str))
668 self.assertIn(dimension, r_html)
670 def testWildcardQueries(self):
671 """Test that different collection type queries work."""
673 # Import data to play with.
674 butler = self.makeButler(writeable=True)
675 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
677 # Create some collections
678 created = {"collection", "u/user/test", "coll3"}
679 for collection in created:
680 butler.registry.registerCollection(collection, type=CollectionType.RUN)
682 collections = butler.registry.queryCollections()
683 self.assertEqual(set(collections), created)
685 expressions = (
686 ("collection", {"collection"}),
687 (..., created),
688 ("*", created),
689 (("collection", "*"), created),
690 ("u/*", {"u/user/test"}),
691 (re.compile("u.*"), {"u/user/test"}),
692 (re.compile(".*oll.*"), {"collection", "coll3"}),
693 ("*oll*", {"collection", "coll3"}),
694 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}),
695 ("*[0-9]", {"coll3"}),
696 )
697 for expression, expected in expressions:
698 result = butler.registry.queryCollections(expression)
699 self.assertEqual(set(result), expected)
702class SimpleButlerUUIDTestCase(SimpleButlerTestCase):
703 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and
704 loads datasets from YAML file with UUIDs.
705 """
707 datasetsManager = (
708 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
709 )
710 datasetsImportFile = "datasets-uuid.yaml"
711 datasetsIdType = uuid.UUID
714class SimpleButlerMixedUUIDTestCase(SimpleButlerTestCase):
715 """Same as SimpleButlerTestCase but uses UUID-based datasets manager and
716 loads datasets from YAML file with integer IDs.
717 """
719 datasetsManager = (
720 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
721 )
722 datasetsImportFile = "datasets.yaml"
723 datasetsIdType = uuid.UUID
726if __name__ == "__main__": 726 ↛ 727line 726 didn't jump to line 727, because the condition on line 726 was never true
727 unittest.main()