Coverage for tests/test_simpleButler.py: 10%
285 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-26 02:48 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import json
31import os
32import re
33import tempfile
34import unittest
35from typing import Any
37try:
38 import numpy as np
39except ImportError:
40 np = None
42import astropy.time
43from lsst.daf.butler import (
44 Butler,
45 ButlerConfig,
46 CollectionType,
47 DataCoordinate,
48 DatasetId,
49 DatasetRef,
50 DatasetType,
51 StorageClass,
52 Timespan,
53)
54from lsst.daf.butler.datastore.file_templates import FileTemplate
55from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
56from lsst.daf.butler.tests import DatastoreMock
57from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
59TESTDIR = os.path.abspath(os.path.dirname(__file__))
62class SimpleButlerTestCase(unittest.TestCase):
63 """Tests for butler (including import/export functionality) that should not
64 depend on the Registry Database backend or Datastore implementation, and
65 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
66 """
68 datasetsManager = (
69 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
70 )
71 datasetsImportFile = "datasets.yaml"
73 def setUp(self):
74 self.root = makeTestTempDir(TESTDIR)
76 def tearDown(self):
77 removeTestTempDir(self.root)
79 def makeButler(self, **kwargs: Any) -> Butler:
80 """Return new Butler instance on each call."""
81 config = ButlerConfig()
83 # make separate temporary directory for registry of this instance
84 tmpdir = tempfile.mkdtemp(dir=self.root)
85 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
86 config["registry", "managers", "datasets"] = self.datasetsManager
87 config["root"] = self.root
89 # have to make a registry first
90 registryConfig = RegistryConfig(config.get("registry"))
91 _RegistryFactory(registryConfig).create_from_config()
93 butler = Butler.from_config(config, **kwargs)
94 DatastoreMock.apply(butler)
95 return butler
97 def comparableRef(self, ref: DatasetRef) -> DatasetRef:
98 """Return a DatasetRef that can be compared to a DatasetRef from
99 other repository.
101 For repositories that do not support round-trip of ID values this
102 method returns unresolved DatasetRef, for round-trip-safe repos it
103 returns unchanged ref.
104 """
105 return ref
107 def testReadBackwardsCompatibility(self):
108 """Test that we can read an export file written by a previous version
109 and commit to the daf_butler git repo.
111 Notes
112 -----
113 At present this export file includes only dimension data, not datasets,
114 which greatly limits the usefulness of this test. We should address
115 this at some point, but I think it's best to wait for the changes to
116 the export format required for CALIBRATION collections to land.
117 """
118 butler = self.makeButler(writeable=True)
119 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
120 # Spot-check a few things, but the most important test is just that
121 # the above does not raise.
122 self.assertGreaterEqual(
123 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")},
124 set(range(104)), # should have all science CCDs; may have some focus ones.
125 )
126 self.assertGreaterEqual(
127 {
128 (record.id, record.physical_filter)
129 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
130 },
131 {
132 (27136, "HSC-Z"),
133 (11694, "HSC-G"),
134 (23910, "HSC-R"),
135 (11720, "HSC-Y"),
136 (23900, "HSC-R"),
137 (22646, "HSC-Y"),
138 (1248, "HSC-I"),
139 (19680, "HSC-I"),
140 (1240, "HSC-I"),
141 (424, "HSC-Y"),
142 (19658, "HSC-I"),
143 (344, "HSC-Y"),
144 (1218, "HSC-R"),
145 (1190, "HSC-Z"),
146 (23718, "HSC-R"),
147 (11700, "HSC-G"),
148 (26036, "HSC-G"),
149 (23872, "HSC-R"),
150 (1170, "HSC-Z"),
151 (1876, "HSC-Y"),
152 },
153 )
155 def testDatasetTransfers(self):
156 """Test exporting all datasets from a repo and then importing them all
157 back in again.
158 """
159 # Import data to play with.
160 butler1 = self.makeButler(writeable=True)
161 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
162 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
163 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
164 # Export all datasets.
165 with butler1.export(filename=file.name) as exporter:
166 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
167 # Import it all again.
168 butler2 = self.makeButler(writeable=True)
169 butler2.import_(filename=file.name)
170 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
171 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
172 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
173 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
174 self.assertCountEqual(
175 [self.comparableRef(ref) for ref in datasets1],
176 [self.comparableRef(ref) for ref in datasets2],
177 )
179 def testImportTwice(self):
180 """Test exporting dimension records and datasets from a repo and then
181 importing them all back in again twice.
182 """
183 # Import data to play with.
184 butler1 = self.makeButler(writeable=True)
185 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
186 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
187 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file:
188 # Export all datasets.
189 with butler1.export(filename=file.name) as exporter:
190 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
191 butler2 = self.makeButler(writeable=True)
192 # Import it once.
193 butler2.import_(filename=file.name)
194 # Import it again
195 butler2.import_(filename=file.name)
196 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
197 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
198 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
199 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
200 self.assertCountEqual(
201 [self.comparableRef(ref) for ref in datasets1],
202 [self.comparableRef(ref) for ref in datasets2],
203 )
205 def testCollectionTransfers(self):
206 """Test exporting and then importing collections of various types."""
207 # Populate a registry with some datasets.
208 butler1 = self.makeButler(writeable=True)
209 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
210 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
211 registry1 = butler1.registry
212 # Add some more collections.
213 registry1.registerRun("run1")
214 registry1.registerCollection("tag1", CollectionType.TAGGED)
215 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
216 registry1.registerCollection("chain1", CollectionType.CHAINED)
217 registry1.registerCollection("chain2", CollectionType.CHAINED)
218 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
219 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
220 # Associate some datasets into the TAGGED and CALIBRATION collections.
221 flats1 = list(registry1.queryDatasets("flat", collections=...))
222 registry1.associate("tag1", flats1)
223 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
224 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
225 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
226 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
227 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
228 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
229 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
230 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
231 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
232 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
233 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
234 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty())
236 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
237 # Export all collections, and some datasets.
238 with butler1.export(filename=file.name) as exporter:
239 # Sort results to put chain1 before chain2, which is
240 # intentionally not topological order.
241 for collection in sorted(registry1.queryCollections()):
242 exporter.saveCollection(collection)
243 exporter.saveDatasets(flats1)
244 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b])
245 # Import them into a new registry.
246 butler2 = self.makeButler(writeable=True)
247 butler2.import_(filename=file.name)
248 registry2 = butler2.registry
249 # Check that it all round-tripped, starting with the collections
250 # themselves.
251 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
252 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
253 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
254 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
255 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
256 self.assertEqual(
257 list(registry2.getCollectionChain("chain1")),
258 ["tag1", "run1", "chain2"],
259 )
260 self.assertEqual(
261 list(registry2.getCollectionChain("chain2")),
262 ["calibration1", "run1"],
263 )
264 # Check that tag collection contents are the same.
265 self.maxDiff = None
266 self.assertCountEqual(
267 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")],
268 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")],
269 )
270 # Check that calibration collection contents are the same.
271 self.assertCountEqual(
272 [
273 (self.comparableRef(assoc.ref), assoc.timespan)
274 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")
275 ],
276 [
277 (self.comparableRef(assoc.ref), assoc.timespan)
278 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")
279 ],
280 )
282 def testButlerGet(self):
283 """Test that butler.get can work with different variants."""
284 # Import data to play with.
285 butler = self.makeButler(writeable=True)
286 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
287 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
289 # Find the DatasetRef for a flat
290 coll = "imported_g"
291 flat2g = butler.find_dataset(
292 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll
293 )
295 # Create a numpy integer to check that works fine
296 detector_np = np.int64(2) if np else 2
298 # Try to get it using different variations of dataId + keyword
299 # arguments
300 # Note that instrument.class_name does not work
301 variants = (
302 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
303 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
304 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
305 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
306 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
307 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
308 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
309 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
310 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
311 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
312 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
313 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
314 (
315 {"detector.name_in_raft": "b", "detector.raft": "A"},
316 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
317 ),
318 (
319 {
320 "detector.name_in_raft": "b",
321 "detector.raft": "A",
322 "instrument": "Cam1",
323 "physical_filter": "Cam1-G",
324 },
325 {},
326 ),
327 # Duplicate (but valid) information.
328 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}),
329 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}),
330 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
331 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
332 )
334 for dataId, kwds in variants:
335 try:
336 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
337 except Exception as e:
338 e.add_note(f"dataId={dataId}, kwds={kwds}")
339 raise
340 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
342 # Check that bad combinations raise.
343 variants = (
344 # Inconsistent detector information.
345 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}),
346 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
347 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
348 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
349 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
350 # Under-specified.
351 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
352 # Spurious kwargs.
353 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}),
354 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
355 )
356 for dataId, kwds in variants:
357 with self.assertRaises((ValueError, LookupError)):
358 butler.get("flat", dataId=dataId, collections=coll, **kwds)
360 def testGetCalibration(self):
361 """Test that `Butler.get` can be used to fetch from
362 `~CollectionType.CALIBRATION` collections if the data ID includes
363 extra dimensions with temporal information.
364 """
365 # Import data to play with.
366 butler = self.makeButler(writeable=True)
367 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
368 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
369 # Certify some biases into a CALIBRATION collection.
370 registry = butler.registry
371 registry.registerCollection("calibs", CollectionType.CALIBRATION)
372 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
373 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
374 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
375 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
376 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
377 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
378 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
379 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
380 registry.certify("calibs", [bias2b], Timespan(t2, None))
381 registry.certify("calibs", [bias3b], Timespan(t2, t3))
382 # Insert some exposure dimension data.
383 registry.insertDimensionData(
384 "group",
385 {"instrument": "Cam1", "group": "three"},
386 {"instrument": "Cam1", "group": "four"},
387 )
388 registry.insertDimensionData(
389 "day_obs",
390 {"instrument": "Cam1", "id": 20211114},
391 )
392 registry.insertDimensionData(
393 "exposure",
394 {
395 "instrument": "Cam1",
396 "id": 3,
397 "obs_id": "three",
398 "timespan": Timespan(t1, t2),
399 "physical_filter": "Cam1-G",
400 "group": "three",
401 "day_obs": 20211114,
402 "seq_num": 55,
403 },
404 {
405 "instrument": "Cam1",
406 "id": 4,
407 "obs_id": "four",
408 "timespan": Timespan(t2, t3),
409 "physical_filter": "Cam1-G",
410 "group": "four",
411 "day_obs": 20211114,
412 "seq_num": 42,
413 },
414 )
415 # Get some biases from raw-like data IDs.
416 bias2a_id, _ = butler.get(
417 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs"
418 )
419 self.assertEqual(bias2a_id, bias2a.id)
420 bias3b_id, _ = butler.get(
421 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs"
422 )
423 self.assertEqual(bias3b_id, bias3b.id)
425 # Get using the kwarg form
426 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs")
427 self.assertEqual(bias3b_id, bias3b.id)
429 # Do it again but using the record information
430 bias2a_id, _ = butler.get(
431 "bias",
432 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"},
433 collections="calibs",
434 )
435 self.assertEqual(bias2a_id, bias2a.id)
436 bias3b_id, _ = butler.get(
437 "bias",
438 {"exposure.obs_id": "four", "detector.full_name": "Ba"},
439 collections="calibs",
440 instrument="Cam1",
441 )
442 self.assertEqual(bias3b_id, bias3b.id)
444 # And again but this time using the alternate value rather than
445 # the primary.
446 bias3b_id, _ = butler.get(
447 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1"
448 )
449 self.assertEqual(bias3b_id, bias3b.id)
451 # And again but this time using the alternate value rather than
452 # the primary and do it in the keyword arguments.
453 bias3b_id, _ = butler.get(
454 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1"
455 )
456 self.assertEqual(bias3b_id, bias3b.id)
458 # Now with implied record columns
459 bias3b_id, _ = butler.get(
460 "bias",
461 day_obs=20211114,
462 seq_num=42,
463 raft="B",
464 name_in_raft="a",
465 collections="calibs",
466 instrument="Cam1",
467 )
468 self.assertEqual(bias3b_id, bias3b.id)
470 # Allow a fully-specified dataId and unnecessary extra information
471 # that comes from the record.
472 bias3b_id, _ = butler.get(
473 "bias",
474 dataId=dict(
475 exposure=4,
476 day_obs=20211114,
477 seq_num=42,
478 detector=3,
479 instrument="Cam1",
480 ),
481 collections="calibs",
482 )
483 self.assertEqual(bias3b_id, bias3b.id)
485 # Extra but inconsistent record values are a problem.
486 with self.assertRaises(ValueError):
487 bias3b_id, _ = butler.get(
488 "bias",
489 exposure=3,
490 day_obs=20211114,
491 seq_num=42,
492 detector=3,
493 collections="calibs",
494 instrument="Cam1",
495 )
497 # Ensure that spurious kwargs cause an exception.
498 with self.assertRaises(ValueError):
499 butler.get(
500 "bias",
501 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"},
502 collections="calibs",
503 instrument="Cam1",
504 )
506 with self.assertRaises(ValueError):
507 butler.get(
508 "bias",
509 day_obs=20211114,
510 seq_num=42,
511 raft="B",
512 name_in_raft="a",
513 collections="calibs",
514 instrument="Cam1",
515 immediate=True,
516 )
518 def testRegistryDefaults(self):
519 """Test that we can default the collections and some data ID keys when
520 constructing a butler.
522 Many tests that use default run already exist in ``test_butler.py``, so
523 that isn't tested here. And while most of this functionality is
524 implemented in `Registry`, we test it here instead of
525 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
526 database backend at all.
527 """
528 butler = self.makeButler(writeable=True)
529 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
530 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
531 # Need to actually set defaults later, not at construction, because
532 # we need to import the instrument before we can use it as a default.
533 # Don't set a default instrument value for data IDs, because 'Cam1'
534 # should be inferred by virtue of that being the only value in the
535 # input collections.
536 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
537 # Use findDataset without collections or instrument.
538 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
539 # Do the same with Butler.get; this should ultimately invoke a lot of
540 # the same code, so it's a bit circular, but mostly we're checking that
541 # it works at all.
542 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
543 self.assertEqual(ref.id, dataset_id)
544 # Query for datasets. Test defaulting the data ID in both kwargs and
545 # in the WHERE expression.
546 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
547 self.assertEqual({ref}, queried_refs_1)
548 queried_refs_2 = set(
549 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'")
550 )
551 self.assertEqual({ref}, queried_refs_2)
552 # Query for data IDs with a dataset constraint.
553 queried_data_ids = set(
554 butler.registry.queryDataIds(
555 {"instrument", "detector", "physical_filter"},
556 datasets={"flat"},
557 detector=2,
558 physical_filter="Cam1-G",
559 )
560 )
561 self.assertEqual({ref.dataId}, queried_data_ids)
562 # Add another instrument to the repo, and a dataset that uses it to
563 # the `imported_g` collection.
564 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
565 camera = DatasetType(
566 "camera",
567 dimensions=butler.dimensions["instrument"].graph,
568 storageClass="Camera",
569 )
570 butler.registry.registerDatasetType(camera)
571 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
572 # Initialize a new butler with `imported_g` as its default run.
573 # This should not have a default instrument, because there are two.
574 # Pass run instead of collections; this should set both.
575 butler2 = Butler.from_config(butler=butler, run="imported_g")
576 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
577 self.assertEqual(butler2.registry.defaults.run, "imported_g")
578 self.assertFalse(butler2.registry.defaults.dataId)
579 # Initialize a new butler with an instrument default explicitly given.
580 # Set collections instead of run, which should then be None.
581 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2")
582 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
583 self.assertIsNone(butler3.registry.defaults.run, None)
584 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"})
586 # Check that repr() does not fail.
587 defaults = RegistryDefaults(collections=["imported_g"], run="test")
588 r = repr(defaults)
589 self.assertIn("collections=('imported_g',)", r)
590 self.assertIn("run='test'", r)
592 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix")
593 r = repr(defaults)
594 self.assertIn("skypix='pix'", r)
595 self.assertIn("instrument='DummyCam'", r)
597 def testJson(self):
598 """Test JSON serialization mediated by registry."""
599 butler = self.makeButler(writeable=True)
600 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
601 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
602 # Need to actually set defaults later, not at construction, because
603 # we need to import the instrument before we can use it as a default.
604 # Don't set a default instrument value for data IDs, because 'Cam1'
605 # should be inferred by virtue of that being the only value in the
606 # input collections.
607 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
608 # Use findDataset without collections or instrument.
609 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
611 # Transform the ref and dataset type to and from JSON
612 # and check that it can be reconstructed properly
614 # Do it with the ref and a component ref in minimal and standard form
615 compRef = ref.makeComponentRef("wcs")
617 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType):
618 for minimal in (False, True):
619 json_str = test_item.to_json(minimal=minimal)
620 from_json = type(test_item).from_json(json_str, registry=butler.registry)
621 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry")
623 # for minimal=False case also do a test without registry
624 if not minimal:
625 from_json = type(test_item).from_json(json_str, universe=butler.dimensions)
626 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe")
628 def test_populated_by(self):
629 """Test that dimension records can find other records."""
630 butler = self.makeButler(writeable=True)
631 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
633 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table)
635 # Get a visit-based dataId.
636 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC"))
638 # Request all the records related to it.
639 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements)
641 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}")
642 self.assertIn(butler.dimensions["visit_system_membership"], records)
643 self.assertIn(butler.dimensions["visit_system"], records)
645 def testJsonDimensionRecordsAndHtmlRepresentation(self):
646 # Dimension Records
647 butler = self.makeButler(writeable=True)
648 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
650 for dimension in ("detector", "visit", "exposure", "day_obs", "group"):
651 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC")
652 for r in records:
653 for minimal in (True, False):
654 json_str = r.to_json(minimal=minimal)
655 r_json = type(r).from_json(json_str, registry=butler.registry)
656 self.assertEqual(r_json, r)
657 # check with direct method
658 simple = r.to_simple()
659 fromDirect = type(simple).direct(**json.loads(json_str))
660 self.assertEqual(simple, fromDirect)
661 # Also check equality of each of the components as dicts
662 self.assertEqual(r_json.toDict(), r.toDict())
664 # check the html representation of records
665 r_html = r._repr_html_()
666 self.assertTrue(isinstance(r_html, str))
667 self.assertIn(dimension, r_html)
669 def test_dimension_records_import(self):
670 # Dimension Records
671 butler = self.makeButler(writeable=True)
672 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset-v0.yaml"))
674 # Count records and assume this means it worked.
675 dimensions = (
676 ("day_obs", 15),
677 ("group", 1),
678 ("exposure", 1),
679 ("visit", 160),
680 ("detector", 111),
681 ("visit_system_membership", 160),
682 )
683 for dimension, count in dimensions:
684 records = list(butler.registry.queryDimensionRecords(dimension, instrument="HSC"))
685 self.assertEqual(len(records), count)
687 def testWildcardQueries(self):
688 """Test that different collection type queries work."""
689 # Import data to play with.
690 butler = self.makeButler(writeable=True)
691 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
693 # Create some collections
694 created = {"collection", "u/user/test", "coll3"}
695 for collection in created:
696 butler.registry.registerCollection(collection, type=CollectionType.RUN)
698 collections = butler.registry.queryCollections()
699 self.assertEqual(set(collections), created)
701 expressions = (
702 ("collection", {"collection"}),
703 (..., created),
704 ("*", created),
705 (("collection", "*"), created),
706 ("u/*", {"u/user/test"}),
707 (re.compile("u.*"), {"u/user/test"}),
708 (re.compile(".*oll.*"), {"collection", "coll3"}),
709 ("*oll*", {"collection", "coll3"}),
710 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}),
711 ("*[0-9]", {"coll3"}),
712 )
713 for expression, expected in expressions:
714 result = butler.registry.queryCollections(expression)
715 self.assertEqual(set(result), expected)
717 def test_skypix_templates(self):
718 """Test that skypix templates can work."""
719 # Dimension Records
720 butler = self.makeButler(writeable=True)
721 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
723 sc = StorageClass("null")
724 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions)
725 dataId = butler.registry.expandDataId(
726 DataCoordinate.standardize(
727 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions
728 )
729 )
730 ref = DatasetRef(dataset_type, dataId, run="test")
731 self.assertTrue(ref.dataId.hasRecords())
733 tmplstr = "{run}/{datasetType}/{visit.name}_{skypix}_{htm7}_{skypix.id}_{htm7.id}"
734 file_template = FileTemplate(tmplstr)
735 path = file_template.format(ref)
736 self.assertEqual(path, "test/warp/HSCA02713600_12345_12345_12345_12345")
739if __name__ == "__main__":
740 unittest.main()