Coverage for tests/test_simpleButler.py: 10%
263 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:44 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import json
31import os
32import re
33import tempfile
34import unittest
35from typing import Any
37try:
38 import numpy as np
39except ImportError:
40 np = None
42import astropy.time
43from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetId, DatasetRef, DatasetType, Timespan
44from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
45from lsst.daf.butler.tests import DatastoreMock
46from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
48TESTDIR = os.path.abspath(os.path.dirname(__file__))
51class SimpleButlerTestCase(unittest.TestCase):
52 """Tests for butler (including import/export functionality) that should not
53 depend on the Registry Database backend or Datastore implementation, and
54 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
55 """
57 datasetsManager = (
58 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
59 )
60 datasetsImportFile = "datasets.yaml"
62 def setUp(self):
63 self.root = makeTestTempDir(TESTDIR)
65 def tearDown(self):
66 removeTestTempDir(self.root)
68 def makeButler(self, **kwargs: Any) -> Butler:
69 """Return new Butler instance on each call."""
70 config = ButlerConfig()
72 # make separate temporary directory for registry of this instance
73 tmpdir = tempfile.mkdtemp(dir=self.root)
74 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
75 config["registry", "managers", "datasets"] = self.datasetsManager
76 config["root"] = self.root
78 # have to make a registry first
79 registryConfig = RegistryConfig(config.get("registry"))
80 _RegistryFactory(registryConfig).create_from_config()
82 butler = Butler.from_config(config, **kwargs)
83 DatastoreMock.apply(butler)
84 return butler
86 def comparableRef(self, ref: DatasetRef) -> DatasetRef:
87 """Return a DatasetRef that can be compared to a DatasetRef from
88 other repository.
90 For repositories that do not support round-trip of ID values this
91 method returns unresolved DatasetRef, for round-trip-safe repos it
92 returns unchanged ref.
93 """
94 return ref
96 def testReadBackwardsCompatibility(self):
97 """Test that we can read an export file written by a previous version
98 and commit to the daf_butler git repo.
100 Notes
101 -----
102 At present this export file includes only dimension data, not datasets,
103 which greatly limits the usefulness of this test. We should address
104 this at some point, but I think it's best to wait for the changes to
105 the export format required for CALIBRATION collections to land.
106 """
107 butler = self.makeButler(writeable=True)
108 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
109 # Spot-check a few things, but the most important test is just that
110 # the above does not raise.
111 self.assertGreaterEqual(
112 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")},
113 set(range(104)), # should have all science CCDs; may have some focus ones.
114 )
115 self.assertGreaterEqual(
116 {
117 (record.id, record.physical_filter)
118 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
119 },
120 {
121 (27136, "HSC-Z"),
122 (11694, "HSC-G"),
123 (23910, "HSC-R"),
124 (11720, "HSC-Y"),
125 (23900, "HSC-R"),
126 (22646, "HSC-Y"),
127 (1248, "HSC-I"),
128 (19680, "HSC-I"),
129 (1240, "HSC-I"),
130 (424, "HSC-Y"),
131 (19658, "HSC-I"),
132 (344, "HSC-Y"),
133 (1218, "HSC-R"),
134 (1190, "HSC-Z"),
135 (23718, "HSC-R"),
136 (11700, "HSC-G"),
137 (26036, "HSC-G"),
138 (23872, "HSC-R"),
139 (1170, "HSC-Z"),
140 (1876, "HSC-Y"),
141 },
142 )
144 def testDatasetTransfers(self):
145 """Test exporting all datasets from a repo and then importing them all
146 back in again.
147 """
148 # Import data to play with.
149 butler1 = self.makeButler(writeable=True)
150 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
151 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
152 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
153 # Export all datasets.
154 with butler1.export(filename=file.name) as exporter:
155 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
156 # Import it all again.
157 butler2 = self.makeButler(writeable=True)
158 butler2.import_(filename=file.name)
159 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
160 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
161 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
162 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
163 self.assertCountEqual(
164 [self.comparableRef(ref) for ref in datasets1],
165 [self.comparableRef(ref) for ref in datasets2],
166 )
168 def testImportTwice(self):
169 """Test exporting dimension records and datasets from a repo and then
170 importing them all back in again twice.
171 """
172 # Import data to play with.
173 butler1 = self.makeButler(writeable=True)
174 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
175 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
176 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file:
177 # Export all datasets.
178 with butler1.export(filename=file.name) as exporter:
179 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
180 butler2 = self.makeButler(writeable=True)
181 # Import it once.
182 butler2.import_(filename=file.name)
183 # Import it again
184 butler2.import_(filename=file.name)
185 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
186 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
187 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
188 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
189 self.assertCountEqual(
190 [self.comparableRef(ref) for ref in datasets1],
191 [self.comparableRef(ref) for ref in datasets2],
192 )
194 def testCollectionTransfers(self):
195 """Test exporting and then importing collections of various types."""
196 # Populate a registry with some datasets.
197 butler1 = self.makeButler(writeable=True)
198 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
199 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
200 registry1 = butler1.registry
201 # Add some more collections.
202 registry1.registerRun("run1")
203 registry1.registerCollection("tag1", CollectionType.TAGGED)
204 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
205 registry1.registerCollection("chain1", CollectionType.CHAINED)
206 registry1.registerCollection("chain2", CollectionType.CHAINED)
207 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
208 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
209 # Associate some datasets into the TAGGED and CALIBRATION collections.
210 flats1 = list(registry1.queryDatasets("flat", collections=...))
211 registry1.associate("tag1", flats1)
212 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
213 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
214 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
215 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
216 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
217 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
218 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
219 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
220 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
221 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
222 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
223 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty())
225 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
226 # Export all collections, and some datasets.
227 with butler1.export(filename=file.name) as exporter:
228 # Sort results to put chain1 before chain2, which is
229 # intentionally not topological order.
230 for collection in sorted(registry1.queryCollections()):
231 exporter.saveCollection(collection)
232 exporter.saveDatasets(flats1)
233 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b])
234 # Import them into a new registry.
235 butler2 = self.makeButler(writeable=True)
236 butler2.import_(filename=file.name)
237 registry2 = butler2.registry
238 # Check that it all round-tripped, starting with the collections
239 # themselves.
240 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
241 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
242 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
243 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
244 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
245 self.assertEqual(
246 list(registry2.getCollectionChain("chain1")),
247 ["tag1", "run1", "chain2"],
248 )
249 self.assertEqual(
250 list(registry2.getCollectionChain("chain2")),
251 ["calibration1", "run1"],
252 )
253 # Check that tag collection contents are the same.
254 self.maxDiff = None
255 self.assertCountEqual(
256 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")],
257 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")],
258 )
259 # Check that calibration collection contents are the same.
260 self.assertCountEqual(
261 [
262 (self.comparableRef(assoc.ref), assoc.timespan)
263 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")
264 ],
265 [
266 (self.comparableRef(assoc.ref), assoc.timespan)
267 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")
268 ],
269 )
271 def testButlerGet(self):
272 """Test that butler.get can work with different variants."""
273 # Import data to play with.
274 butler = self.makeButler(writeable=True)
275 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
276 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
278 # Find the DatasetRef for a flat
279 coll = "imported_g"
280 flat2g = butler.find_dataset(
281 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll
282 )
284 # Create a numpy integer to check that works fine
285 detector_np = np.int64(2) if np else 2
287 # Try to get it using different variations of dataId + keyword
288 # arguments
289 # Note that instrument.class_name does not work
290 variants = (
291 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
292 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
293 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
294 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
295 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
296 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
297 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
298 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
299 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
300 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
301 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
302 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
303 (
304 {"detector.name_in_raft": "b", "detector.raft": "A"},
305 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
306 ),
307 (
308 {
309 "detector.name_in_raft": "b",
310 "detector.raft": "A",
311 "instrument": "Cam1",
312 "physical_filter": "Cam1-G",
313 },
314 {},
315 ),
316 # Duplicate (but valid) information.
317 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}),
318 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}),
319 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
320 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
321 )
323 for dataId, kwds in variants:
324 try:
325 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
326 except Exception as e:
327 e.add_note(f"dataId={dataId}, kwds={kwds}")
328 raise
329 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
331 # Check that bad combinations raise.
332 variants = (
333 # Inconsistent detector information.
334 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}),
335 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
336 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
337 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
338 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
339 # Under-specified.
340 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
341 # Spurious kwargs.
342 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}),
343 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
344 )
345 for dataId, kwds in variants:
346 with self.assertRaises((ValueError, LookupError)):
347 butler.get("flat", dataId=dataId, collections=coll, **kwds)
349 def testGetCalibration(self):
350 """Test that `Butler.get` can be used to fetch from
351 `~CollectionType.CALIBRATION` collections if the data ID includes
352 extra dimensions with temporal information.
353 """
354 # Import data to play with.
355 butler = self.makeButler(writeable=True)
356 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
357 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
358 # Certify some biases into a CALIBRATION collection.
359 registry = butler.registry
360 registry.registerCollection("calibs", CollectionType.CALIBRATION)
361 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
362 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
363 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
364 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
365 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
366 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
367 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
368 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
369 registry.certify("calibs", [bias2b], Timespan(t2, None))
370 registry.certify("calibs", [bias3b], Timespan(t2, t3))
371 # Insert some exposure dimension data.
372 registry.insertDimensionData(
373 "exposure",
374 {
375 "instrument": "Cam1",
376 "id": 3,
377 "obs_id": "three",
378 "timespan": Timespan(t1, t2),
379 "physical_filter": "Cam1-G",
380 "day_obs": 20201114,
381 "seq_num": 55,
382 },
383 {
384 "instrument": "Cam1",
385 "id": 4,
386 "obs_id": "four",
387 "timespan": Timespan(t2, t3),
388 "physical_filter": "Cam1-G",
389 "day_obs": 20211114,
390 "seq_num": 42,
391 },
392 )
393 # Get some biases from raw-like data IDs.
394 bias2a_id, _ = butler.get(
395 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs"
396 )
397 self.assertEqual(bias2a_id, bias2a.id)
398 bias3b_id, _ = butler.get(
399 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs"
400 )
401 self.assertEqual(bias3b_id, bias3b.id)
403 # Get using the kwarg form
404 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs")
405 self.assertEqual(bias3b_id, bias3b.id)
407 # Do it again but using the record information
408 bias2a_id, _ = butler.get(
409 "bias",
410 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"},
411 collections="calibs",
412 )
413 self.assertEqual(bias2a_id, bias2a.id)
414 bias3b_id, _ = butler.get(
415 "bias",
416 {"exposure.obs_id": "four", "detector.full_name": "Ba"},
417 collections="calibs",
418 instrument="Cam1",
419 )
420 self.assertEqual(bias3b_id, bias3b.id)
422 # And again but this time using the alternate value rather than
423 # the primary.
424 bias3b_id, _ = butler.get(
425 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1"
426 )
427 self.assertEqual(bias3b_id, bias3b.id)
429 # And again but this time using the alternate value rather than
430 # the primary and do it in the keyword arguments.
431 bias3b_id, _ = butler.get(
432 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1"
433 )
434 self.assertEqual(bias3b_id, bias3b.id)
436 # Now with implied record columns
437 bias3b_id, _ = butler.get(
438 "bias",
439 day_obs=20211114,
440 seq_num=42,
441 raft="B",
442 name_in_raft="a",
443 collections="calibs",
444 instrument="Cam1",
445 )
446 self.assertEqual(bias3b_id, bias3b.id)
448 # Allow a fully-specified dataId and unnecessary extra information
449 # that comes from the record.
450 bias3b_id, _ = butler.get(
451 "bias",
452 dataId=dict(
453 exposure=4,
454 day_obs=20211114,
455 seq_num=42,
456 detector=3,
457 instrument="Cam1",
458 ),
459 collections="calibs",
460 )
461 self.assertEqual(bias3b_id, bias3b.id)
463 # Extra but inconsistent record values are a problem.
464 with self.assertRaises(ValueError):
465 bias3b_id, _ = butler.get(
466 "bias",
467 exposure=3,
468 day_obs=20211114,
469 seq_num=42,
470 detector=3,
471 collections="calibs",
472 instrument="Cam1",
473 )
475 # Ensure that spurious kwargs cause an exception.
476 with self.assertRaises(ValueError):
477 butler.get(
478 "bias",
479 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"},
480 collections="calibs",
481 instrument="Cam1",
482 )
484 with self.assertRaises(ValueError):
485 butler.get(
486 "bias",
487 day_obs=20211114,
488 seq_num=42,
489 raft="B",
490 name_in_raft="a",
491 collections="calibs",
492 instrument="Cam1",
493 immediate=True,
494 )
496 def testRegistryDefaults(self):
497 """Test that we can default the collections and some data ID keys when
498 constructing a butler.
500 Many tests that use default run already exist in ``test_butler.py``, so
501 that isn't tested here. And while most of this functionality is
502 implemented in `Registry`, we test it here instead of
503 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
504 database backend at all.
505 """
506 butler = self.makeButler(writeable=True)
507 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
508 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
509 # Need to actually set defaults later, not at construction, because
510 # we need to import the instrument before we can use it as a default.
511 # Don't set a default instrument value for data IDs, because 'Cam1'
512 # should be inferred by virtue of that being the only value in the
513 # input collections.
514 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
515 # Use findDataset without collections or instrument.
516 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
517 # Do the same with Butler.get; this should ultimately invoke a lot of
518 # the same code, so it's a bit circular, but mostly we're checking that
519 # it works at all.
520 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
521 self.assertEqual(ref.id, dataset_id)
522 # Query for datasets. Test defaulting the data ID in both kwargs and
523 # in the WHERE expression.
524 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
525 self.assertEqual({ref}, queried_refs_1)
526 queried_refs_2 = set(
527 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'")
528 )
529 self.assertEqual({ref}, queried_refs_2)
530 # Query for data IDs with a dataset constraint.
531 queried_data_ids = set(
532 butler.registry.queryDataIds(
533 {"instrument", "detector", "physical_filter"},
534 datasets={"flat"},
535 detector=2,
536 physical_filter="Cam1-G",
537 )
538 )
539 self.assertEqual({ref.dataId}, queried_data_ids)
540 # Add another instrument to the repo, and a dataset that uses it to
541 # the `imported_g` collection.
542 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
543 camera = DatasetType(
544 "camera",
545 dimensions=butler.dimensions["instrument"].graph,
546 storageClass="Camera",
547 )
548 butler.registry.registerDatasetType(camera)
549 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
550 # Initialize a new butler with `imported_g` as its default run.
551 # This should not have a default instrument, because there are two.
552 # Pass run instead of collections; this should set both.
553 butler2 = Butler.from_config(butler=butler, run="imported_g")
554 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
555 self.assertEqual(butler2.registry.defaults.run, "imported_g")
556 self.assertFalse(butler2.registry.defaults.dataId)
557 # Initialize a new butler with an instrument default explicitly given.
558 # Set collections instead of run, which should then be None.
559 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2")
560 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
561 self.assertIsNone(butler3.registry.defaults.run, None)
562 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"})
564 # Check that repr() does not fail.
565 defaults = RegistryDefaults(collections=["imported_g"], run="test")
566 r = repr(defaults)
567 self.assertIn("collections=('imported_g',)", r)
568 self.assertIn("run='test'", r)
570 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix")
571 r = repr(defaults)
572 self.assertIn("skypix='pix'", r)
573 self.assertIn("instrument='DummyCam'", r)
575 def testJson(self):
576 """Test JSON serialization mediated by registry."""
577 butler = self.makeButler(writeable=True)
578 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
579 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
580 # Need to actually set defaults later, not at construction, because
581 # we need to import the instrument before we can use it as a default.
582 # Don't set a default instrument value for data IDs, because 'Cam1'
583 # should be inferred by virtue of that being the only value in the
584 # input collections.
585 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
586 # Use findDataset without collections or instrument.
587 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
589 # Transform the ref and dataset type to and from JSON
590 # and check that it can be reconstructed properly
592 # Do it with the ref and a component ref in minimal and standard form
593 compRef = ref.makeComponentRef("wcs")
595 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType):
596 for minimal in (False, True):
597 json_str = test_item.to_json(minimal=minimal)
598 from_json = type(test_item).from_json(json_str, registry=butler.registry)
599 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry")
601 # for minimal=False case also do a test without registry
602 if not minimal:
603 from_json = type(test_item).from_json(json_str, universe=butler.dimensions)
604 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe")
606 def test_populated_by(self):
607 """Test that dimension records can find other records."""
608 butler = self.makeButler(writeable=True)
609 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
611 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table)
613 # Get a visit-based dataId.
614 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC"))
616 # Request all the records related to it.
617 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements)
619 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}")
620 self.assertIn(butler.dimensions["visit_system_membership"], records)
621 self.assertIn(butler.dimensions["visit_system"], records)
623 def testJsonDimensionRecordsAndHtmlRepresentation(self):
624 # Dimension Records
625 butler = self.makeButler(writeable=True)
626 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
628 for dimension in ("detector", "visit"):
629 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC")
630 for r in records:
631 for minimal in (True, False):
632 json_str = r.to_json(minimal=minimal)
633 r_json = type(r).from_json(json_str, registry=butler.registry)
634 self.assertEqual(r_json, r)
635 # check with direct method
636 simple = r.to_simple()
637 fromDirect = type(simple).direct(**json.loads(json_str))
638 self.assertEqual(simple, fromDirect)
639 # Also check equality of each of the components as dicts
640 self.assertEqual(r_json.toDict(), r.toDict())
642 # check the html representation of records
643 r_html = r._repr_html_()
644 self.assertTrue(isinstance(r_html, str))
645 self.assertIn(dimension, r_html)
647 def testWildcardQueries(self):
648 """Test that different collection type queries work."""
649 # Import data to play with.
650 butler = self.makeButler(writeable=True)
651 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
653 # Create some collections
654 created = {"collection", "u/user/test", "coll3"}
655 for collection in created:
656 butler.registry.registerCollection(collection, type=CollectionType.RUN)
658 collections = butler.registry.queryCollections()
659 self.assertEqual(set(collections), created)
661 expressions = (
662 ("collection", {"collection"}),
663 (..., created),
664 ("*", created),
665 (("collection", "*"), created),
666 ("u/*", {"u/user/test"}),
667 (re.compile("u.*"), {"u/user/test"}),
668 (re.compile(".*oll.*"), {"collection", "coll3"}),
669 ("*oll*", {"collection", "coll3"}),
670 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}),
671 ("*[0-9]", {"coll3"}),
672 )
673 for expression, expected in expressions:
674 result = butler.registry.queryCollections(expression)
675 self.assertEqual(set(result), expected)
678if __name__ == "__main__":
679 unittest.main()