Coverage for tests / test_simpleButler.py: 12%
435 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:37 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import json
31import os
32import re
33import tempfile
34import unittest
35import urllib.parse
36from typing import Any
38try:
39 import numpy as np
40except ImportError:
41 np = None
43import astropy.time
45from lsst.daf.butler import (
46 Butler,
47 ButlerConfig,
48 CollectionType,
49 DataCoordinate,
50 DatasetId,
51 DatasetRef,
52 DatasetType,
53 LabeledButlerFactory,
54 StorageClass,
55 Timespan,
56)
57from lsst.daf.butler.datastore.file_templates import FileTemplate
58from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
59from lsst.daf.butler.tests import DatastoreMock
60from lsst.daf.butler.tests.server_available import butler_server_import_error, butler_server_is_available
61from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, mock_env, removeTestTempDir
63if butler_server_is_available: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true
64 from lsst.daf.butler.tests.server import create_test_server
66TESTDIR = os.path.abspath(os.path.dirname(__file__))
69class SimpleButlerTests(TestCaseMixin):
70 """Tests for butler (including import/export functionality) that should not
71 depend on the Registry Database backend or Datastore implementation, and
72 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
73 """
75 datasetsImportFile = "datasets.yaml"
77 supportsCollectionRegex: bool = True
78 """True if the registry class being tested supports regex searches for
79 collections."""
81 def makeButler(self, writeable: bool = False) -> Butler:
82 raise NotImplementedError()
84 def comparableRef(self, ref: DatasetRef) -> DatasetRef:
85 """Return a DatasetRef that can be compared to a DatasetRef from
86 other repository.
88 For repositories that do not support round-trip of ID values this
89 method returns unresolved DatasetRef, for round-trip-safe repos it
90 returns unchanged ref.
91 """
92 return ref
94 def testReadBackwardsCompatibility(self):
95 """Test that we can read an export file written by a previous version
96 and commit to the daf_butler git repo.
98 Notes
99 -----
100 At present this export file includes only dimension data, not datasets,
101 which greatly limits the usefulness of this test. We should address
102 this at some point, but I think it's best to wait for the changes to
103 the export format required for CALIBRATION collections to land.
104 """
105 butler = self.makeButler(writeable=True)
106 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
107 # Spot-check a few things, but the most important test is just that
108 # the above does not raise.
109 self.assertGreaterEqual(
110 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")},
111 set(range(104)), # should have all science CCDs; may have some focus ones.
112 )
113 self.assertGreaterEqual(
114 {
115 (record.id, record.physical_filter)
116 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
117 },
118 {
119 (27136, "HSC-Z"),
120 (11694, "HSC-G"),
121 (23910, "HSC-R"),
122 (11720, "HSC-Y"),
123 (23900, "HSC-R"),
124 (22646, "HSC-Y"),
125 (1248, "HSC-I"),
126 (19680, "HSC-I"),
127 (1240, "HSC-I"),
128 (424, "HSC-Y"),
129 (19658, "HSC-I"),
130 (344, "HSC-Y"),
131 (1218, "HSC-R"),
132 (1190, "HSC-Z"),
133 (23718, "HSC-R"),
134 (11700, "HSC-G"),
135 (26036, "HSC-G"),
136 (23872, "HSC-R"),
137 (1170, "HSC-Z"),
138 (1876, "HSC-Y"),
139 },
140 )
142 def testDatasetTransfers(self):
143 """Test exporting all datasets from a repo and then importing them all
144 back in again.
145 """
146 # Import data to play with.
147 butler1 = self.makeButler(writeable=True)
148 butler1.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
149 butler1.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
150 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
151 # Export all datasets.
152 with butler1.export(filename=file.name) as exporter:
153 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
154 # Import it all again.
155 butler2 = self.makeButler(writeable=True)
156 butler2.import_(filename=file.name)
157 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
158 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
159 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
160 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
161 self.assertCountEqual(
162 [self.comparableRef(ref) for ref in datasets1],
163 [self.comparableRef(ref) for ref in datasets2],
164 )
166 def testImportTwice(self):
167 """Test exporting dimension records and datasets from a repo and then
168 importing them all back in again twice.
169 """
170 # Import data to play with.
171 butler1 = self.makeButler(writeable=True)
172 butler1.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
173 butler1.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
174 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file:
175 # Export all datasets.
176 with butler1.export(filename=file.name) as exporter:
177 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
178 butler2 = self.makeButler(writeable=True)
179 # Import it once.
180 butler2.import_(filename=file.name)
181 # Import it again
182 butler2.import_(filename=file.name)
183 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
184 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
185 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
186 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
187 self.assertCountEqual(
188 [self.comparableRef(ref) for ref in datasets1],
189 [self.comparableRef(ref) for ref in datasets2],
190 )
192 def testCollectionTransfers(self):
193 """Test exporting and then importing collections of various types."""
194 # Populate a registry with some datasets.
195 butler1 = self.makeButler(writeable=True)
196 butler1.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
197 butler1.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
198 registry1 = butler1.registry
199 # Add some more collections.
200 registry1.registerRun("run1")
201 registry1.registerCollection("tag1", CollectionType.TAGGED)
202 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
203 registry1.registerCollection("chain1", CollectionType.CHAINED)
204 registry1.registerCollection("chain2", CollectionType.CHAINED)
205 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
206 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
207 # Associate some datasets into the TAGGED and CALIBRATION collections.
208 flats1 = list(registry1.queryDatasets("flat", collections=...))
209 registry1.associate("tag1", flats1)
210 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
211 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
212 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
213 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
214 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
215 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
216 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
217 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
218 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
219 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
220 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
221 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty())
223 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
224 # Export all collections, and some datasets.
225 with butler1.export(filename=file.name) as exporter:
226 # Sort results to put chain1 before chain2, which is
227 # intentionally not topological order.
228 for collection in sorted(registry1.queryCollections()):
229 exporter.saveCollection(collection)
230 exporter.saveDatasets(flats1)
231 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b])
232 # Import them into a new registry.
233 butler2 = self.makeButler(writeable=True)
234 butler2.import_(filename=file.name)
235 registry2 = butler2.registry
236 # Check that it all round-tripped, starting with the collections
237 # themselves.
238 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
239 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
240 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
241 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
242 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
243 self.assertEqual(
244 list(registry2.getCollectionChain("chain1")),
245 ["tag1", "run1", "chain2"],
246 )
247 self.assertEqual(
248 list(registry2.getCollectionChain("chain2")),
249 ["calibration1", "run1"],
250 )
251 # Check that tag collection contents are the same.
252 self.maxDiff = None
253 self.assertCountEqual(
254 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")],
255 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")],
256 )
257 # Check that calibration collection contents are the same.
258 self.assertCountEqual(
259 [
260 (self.comparableRef(assoc.ref), assoc.timespan)
261 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")
262 ],
263 [
264 (self.comparableRef(assoc.ref), assoc.timespan)
265 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")
266 ],
267 )
269 def testButlerGet(self):
270 """Test that butler.get can work with different variants."""
271 # Import data to play with.
272 butler = self.makeButler(writeable=True)
273 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
274 butler.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
276 # Find the DatasetRef for a flat
277 coll = "imported_g"
278 flat2g = butler.find_dataset(
279 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll
280 )
282 # Create a numpy integer to check that works fine
283 detector_np = np.int64(2) if np else 2
285 # Try to get it using different variations of dataId + keyword
286 # arguments
287 # Note that instrument.class_name does not work
288 variants = (
289 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
290 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
291 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
292 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
293 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
294 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
295 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
296 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
297 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
298 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
299 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
300 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
301 (
302 {"detector.name_in_raft": "b", "detector.raft": "A"},
303 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
304 ),
305 (
306 {
307 "detector.name_in_raft": "b",
308 "detector.raft": "A",
309 "instrument": "Cam1",
310 "physical_filter": "Cam1-G",
311 },
312 {},
313 ),
314 # Duplicate (but valid) information.
315 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}),
316 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}),
317 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
318 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
319 )
320 butler._metrics.reset()
321 n_got = 0
322 for dataId, kwds in variants:
323 try:
324 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
325 n_got += 1
326 except Exception as e:
327 e.add_note(f"dataId={dataId}, kwds={kwds}")
328 raise
329 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
330 self.assertEqual(butler._metrics.n_get, n_got)
332 # Check that bad combinations raise.
333 variants = (
334 # Inconsistent detector information.
335 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}),
336 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
337 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
338 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
339 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
340 # Under-specified.
341 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
342 # Spurious kwargs.
343 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}),
344 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
345 )
346 for dataId, kwds in variants:
347 with self.assertRaises((ValueError, LookupError)):
348 butler.get("flat", dataId=dataId, collections=coll, **kwds)
350 def testGetCalibration(self):
351 """Test that `Butler.get` can be used to fetch from
352 `~CollectionType.CALIBRATION` collections if the data ID includes
353 extra dimensions with temporal information.
354 """
355 # Import data to play with.
356 butler = self.makeButler(writeable=True)
357 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
358 butler.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
359 # Certify some biases into a CALIBRATION collection.
360 registry = butler.registry
361 registry.registerCollection("calibs", CollectionType.CALIBRATION)
362 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
363 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
364 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
365 bias1a = registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
366 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
367 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
368 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
369 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
370 registry.certify("calibs", [bias1a], Timespan(t1, t2))
371 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
372 registry.certify("calibs", [bias2b], Timespan(t2, None))
373 registry.certify("calibs", [bias3b], Timespan(t2, t3))
374 # Insert some exposure dimension data.
375 registry.insertDimensionData(
376 "group",
377 {"instrument": "Cam1", "group": "three"},
378 {"instrument": "Cam1", "group": "four"},
379 )
380 registry.insertDimensionData(
381 "day_obs",
382 {"instrument": "Cam1", "id": 20211114},
383 )
384 # Choose timespans for exposures within the above calibration ranges
385 # but make sure they are not identical to the full range.
386 exp_time = astropy.time.TimeDelta(15.0, format="sec", scale="tai")
387 span_delta = t2 - t1
388 exp3_begin = t1 + (span_delta / 2.0)
389 exp3_end = exp3_begin + exp_time
390 span_delta = t3 - t2
391 exp4_begin = t2 + (span_delta / 2.0)
392 exp4_end = exp4_begin + exp_time
393 registry.insertDimensionData(
394 "exposure",
395 {
396 "instrument": "Cam1",
397 "id": 3,
398 "obs_id": "three",
399 "timespan": Timespan(exp3_begin, exp3_end),
400 "physical_filter": "Cam1-G",
401 "group": "three",
402 "day_obs": 20211114,
403 "seq_num": 55,
404 },
405 {
406 "instrument": "Cam1",
407 "id": 4,
408 "obs_id": "four",
409 "timespan": Timespan(exp4_begin, exp4_end),
410 "physical_filter": "Cam1-G",
411 "group": "four",
412 "day_obs": 20211114,
413 "seq_num": 42,
414 },
415 )
416 # Get some biases from raw-like data IDs.
417 bias2a_id, _ = butler.get(
418 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs"
419 )
420 self.assertEqual(bias2a_id, bias2a.id)
421 bias3b_id, _ = butler.get(
422 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs"
423 )
424 self.assertEqual(bias3b_id, bias3b.id)
426 # Use explicit timespan and no exposure record.
427 bias3b_id, _ = butler.get(
428 "bias",
429 {"instrument": "Cam1", "detector": 3},
430 collections="calibs",
431 timespan=Timespan(exp4_begin, exp4_end),
432 )
433 self.assertEqual(bias3b_id, bias3b.id)
435 # No timespan at all.
436 # Only one matching dataset in calibs collection so this works with
437 # a defaulted timespan.
438 bias1a_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 1}, collections="calibs")
439 self.assertEqual(bias1a_id, bias1a.id)
441 # Multiple datasets match in calibs collection with infinite timespan
442 # so this fails.
443 with self.assertRaises(LookupError):
444 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 3}, collections="calibs")
446 # Get using the kwarg form
447 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs")
448 self.assertEqual(bias3b_id, bias3b.id)
450 # Do it again but using the record information
451 bias2a_id, _ = butler.get(
452 "bias",
453 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"},
454 collections="calibs",
455 )
456 self.assertEqual(bias2a_id, bias2a.id)
457 bias3b_id, _ = butler.get(
458 "bias",
459 {"exposure.obs_id": "four", "detector.full_name": "Ba"},
460 collections="calibs",
461 instrument="Cam1",
462 )
463 self.assertEqual(bias3b_id, bias3b.id)
465 # And again but this time using the alternate value rather than
466 # the primary.
467 bias3b_id, _ = butler.get(
468 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1"
469 )
470 self.assertEqual(bias3b_id, bias3b.id)
472 # And again but this time using the alternate value rather than
473 # the primary and do it in the keyword arguments.
474 bias3b_id, _ = butler.get(
475 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1"
476 )
477 self.assertEqual(bias3b_id, bias3b.id)
479 # Now with implied record columns
480 bias3b_id, _ = butler.get(
481 "bias",
482 day_obs=20211114,
483 seq_num=42,
484 raft="B",
485 name_in_raft="a",
486 collections="calibs",
487 instrument="Cam1",
488 )
489 self.assertEqual(bias3b_id, bias3b.id)
491 # Allow a fully-specified dataId and unnecessary extra information
492 # that comes from the record.
493 bias3b_id, _ = butler.get(
494 "bias",
495 dataId=dict(
496 exposure=4,
497 day_obs=20211114,
498 seq_num=42,
499 detector=3,
500 instrument="Cam1",
501 ),
502 collections="calibs",
503 )
504 self.assertEqual(bias3b_id, bias3b.id)
506 # Query for a calibration in a RUN and CALIBRATION collection to
507 # ensure we do not get duplicate results.
508 results = butler.query_datasets("bias", collections=["calibs", "imported_g"], find_first=False)
509 self.assertEqual(len(set(results)), len(results))
511 # Extra but inconsistent record values are a problem.
512 with self.assertRaises(ValueError):
513 bias3b_id, _ = butler.get(
514 "bias",
515 exposure=3,
516 day_obs=20211114,
517 seq_num=42,
518 detector=3,
519 collections="calibs",
520 instrument="Cam1",
521 )
523 # Ensure that spurious kwargs cause an exception.
524 with self.assertRaises(ValueError):
525 butler.get(
526 "bias",
527 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"},
528 collections="calibs",
529 instrument="Cam1",
530 )
532 with self.assertRaises(ValueError):
533 butler.get(
534 "bias",
535 day_obs=20211114,
536 seq_num=42,
537 raft="B",
538 name_in_raft="a",
539 collections="calibs",
540 instrument="Cam1",
541 immediate=True,
542 )
544 def testRegistryDefaults(self):
545 """Test that we can default the collections and some data ID keys when
546 constructing a butler.
548 Many tests that use default run already exist in ``test_butler.py``, so
549 that isn't tested here. And while most of this functionality is
550 implemented in `Registry`, we test it here instead of
551 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
552 database backend at all.
553 """
554 butler = self.makeButler(writeable=True)
555 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
556 butler.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
557 # Need to actually set defaults later, not at construction, because
558 # we need to import the instrument before we can use it as a default.
559 # Don't set a default instrument value for data IDs, because 'Cam1'
560 # should be inferred by virtue of that being the only value in the
561 # input collections.
562 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
563 # Use findDataset without collections or instrument.
564 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
565 # Do the same with Butler.get; this should ultimately invoke a lot of
566 # the same code, so it's a bit circular, but mostly we're checking that
567 # it works at all.
568 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
569 self.assertEqual(ref.id, dataset_id)
570 # Query for datasets. Test defaulting the data ID in both kwargs and
571 # in the WHERE expression.
572 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
573 self.assertEqual({ref}, queried_refs_1)
574 queried_refs_2 = set(
575 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'")
576 )
577 self.assertEqual({ref}, queried_refs_2)
578 # Query for data IDs with a dataset constraint.
579 queried_data_ids = set(
580 butler.registry.queryDataIds(
581 {"instrument", "detector", "physical_filter"},
582 datasets={"flat"},
583 detector=2,
584 physical_filter="Cam1-G",
585 )
586 )
587 self.assertEqual({ref.dataId}, queried_data_ids)
588 # Add another instrument to the repo, and a dataset that uses it to
589 # the `imported_g` collection.
590 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
591 camera = DatasetType(
592 "camera",
593 dimensions=butler.dimensions["instrument"].minimal_group,
594 storageClass="Camera",
595 )
596 butler.registry.registerDatasetType(camera)
597 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
598 # Initialize a new butler with `imported_g` as its default run.
599 # This should not have a default instrument, because there are two.
600 # Pass run instead of collections; this should set both.
601 butler2 = Butler.from_config(butler=butler, run="imported_g")
602 self.enterContext(butler2)
603 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
604 self.assertEqual(butler2.registry.defaults.run, "imported_g")
605 self.assertFalse(butler2.registry.defaults.dataId)
606 # Initialize a new butler with an instrument default explicitly given.
607 # Set collections instead of run, which should then be None.
608 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2")
609 self.enterContext(butler3)
610 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
611 self.assertIsNone(butler3.registry.defaults.run, None)
612 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"})
614 # Check that repr() does not fail.
615 defaults = RegistryDefaults(collections=["imported_g"], run="test")
616 r = repr(defaults)
617 self.assertIn("collections=('imported_g',)", r)
618 self.assertIn("run='test'", r)
620 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix")
621 r = repr(defaults)
622 self.assertIn("skypix='pix'", r)
623 self.assertIn("instrument='DummyCam'", r)
625 def testJson(self):
626 """Test JSON serialization mediated by registry."""
627 butler = self.makeButler(writeable=True)
628 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
629 butler.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
630 # Need to actually set defaults later, not at construction, because
631 # we need to import the instrument before we can use it as a default.
632 # Don't set a default instrument value for data IDs, because 'Cam1'
633 # should be inferred by virtue of that being the only value in the
634 # input collections.
635 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
636 # Use findDataset without collections or instrument.
637 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
639 # Transform the ref and dataset type to and from JSON
640 # and check that it can be reconstructed properly
642 # Do it with the ref and a component ref in minimal and standard form
643 compRef = ref.makeComponentRef("wcs")
645 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType):
646 for minimal in (False, True):
647 json_str = test_item.to_json(minimal=minimal)
648 from_json = type(test_item).from_json(json_str, registry=butler.registry)
649 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry")
651 # for minimal=False case also do a test without registry
652 if not minimal:
653 from_json = type(test_item).from_json(json_str, universe=butler.dimensions)
654 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe")
656 def test_populated_by(self):
657 """Test that dimension records can find other records."""
658 butler = self.makeButler(writeable=True)
659 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
661 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table)
663 # Get a visit-based dataId.
664 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC"))
666 # Request all the records related to it.
667 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements)
669 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}")
670 self.assertIn(butler.dimensions["visit_system_membership"], records)
671 self.assertIn(butler.dimensions["visit_system"], records)
673 def testJsonDimensionRecordsAndHtmlRepresentation(self):
674 # Dimension Records
675 butler = self.makeButler(writeable=True)
676 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
678 for dimension in ("detector", "visit", "exposure", "day_obs", "group"):
679 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC")
680 for r in records:
681 for minimal in (True, False):
682 json_str = r.to_json(minimal=minimal)
683 r_json = type(r).from_json(json_str, registry=butler.registry)
684 self.assertEqual(r_json, r)
685 # check with direct method
686 simple = r.to_simple()
687 fromDirect = type(simple).direct(**json.loads(json_str))
688 self.assertEqual(simple, fromDirect)
689 # Also check equality of each of the components as dicts
690 self.assertEqual(r_json.toDict(), r.toDict())
692 # check the html representation of records
693 r_html = r._repr_html_()
694 self.assertTrue(isinstance(r_html, str))
695 self.assertIn(dimension, r_html)
697 def test_dimension_records_import(self):
698 # Dimension Records
699 butler = self.makeButler(writeable=True)
700 with self.assertWarns(UserWarning) as cm:
701 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/hsc-rc2-subset-v0.yaml")
702 self.assertIn("Constructing day_obs records with no timespans", str(cm.warning))
704 # Count records and assume this means it worked.
705 dimensions = (
706 ("day_obs", 3),
707 ("group", 1),
708 ("exposure", 1),
709 ("visit", 2),
710 ("detector", 3),
711 ("visit_system_membership", 2),
712 )
713 for dimension, count in dimensions:
714 records = list(butler.registry.queryDimensionRecords(dimension, instrument="HSC"))
715 self.assertEqual(len(records), count, dimension)
717 def testWildcardQueries(self):
718 """Test that different collection type queries work."""
719 # Import data to play with.
720 butler = self.makeButler(writeable=True)
721 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
723 # Create some collections
724 created = {"collection", "u/user/test", "coll3"}
725 for collection in created:
726 butler.registry.registerCollection(collection, type=CollectionType.RUN)
728 collections = butler.registry.queryCollections()
729 self.assertEqual(set(collections), created)
731 expressions = [
732 ("collection", {"collection"}),
733 (..., created),
734 ("*", created),
735 (("collection", "*"), created),
736 ("u/*", {"u/user/test"}),
737 ("*oll*", {"collection", "coll3"}),
738 ("*[0-9]", {"coll3"}),
739 ]
740 for expression, expected in expressions:
741 result = butler.registry.queryCollections(expression)
742 self.assertEqual(set(result), expected)
744 if self.supportsCollectionRegex:
745 expressions = [
746 (re.compile("u.*"), {"u/user/test"}),
747 (re.compile(".*oll.*"), {"collection", "coll3"}),
748 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}),
749 ]
750 for expression, expected in expressions:
751 with self.assertWarns(FutureWarning):
752 result = butler.registry.queryCollections(expression)
753 self.assertEqual(set(result), expected)
755 def test_skypix_templates(self):
756 """Test that skypix templates can work."""
757 # Dimension Records
758 butler = self.makeButler(writeable=True)
759 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
761 sc = StorageClass("null")
762 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions)
763 dataId = butler.registry.expandDataId(
764 DataCoordinate.standardize(
765 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions
766 )
767 )
768 ref = DatasetRef(dataset_type, dataId, run="test")
769 self.assertTrue(ref.dataId.hasRecords())
771 tmplstr = (
772 "{run}/{datasetType}/{visit.name|exposure.obs_id|xyz}_"
773 "{instrument}_{skypix}_{htm7}_{skypix.id}_{htm7.id}"
774 )
775 file_template = FileTemplate(tmplstr)
776 file_template.validateTemplate(ref)
777 path = file_template.format(ref)
778 self.assertEqual(path, "test/warp/HSCA02713600_HSC_12345_12345_12345_12345")
780 def test_expand_changed_id(self):
781 """Test that expandDataId doesn't reuse records invalidated by kwarg
782 changes.
783 """
784 butler = self.makeButler(writeable=True)
785 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
786 data_id_1 = butler.registry.expandDataId(instrument="Cam1", detector=1)
787 data_id_2 = butler.registry.expandDataId(data_id_1, detector=2)
788 self.assertEqual(data_id_2.records["detector"].id, 2)
790 def test_clone(self):
791 # This just tests that the default-overriding logic works as expected.
792 # The actual internals are tested in test_butler.py, in
793 # ClonedSqliteButlerTestCase and
794 # ClonedPostgresPosixDatastoreButlerTestCase.
796 butler = self.makeButler(writeable=True)
797 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
798 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/datasets.yaml")
799 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/spatial.yaml")
801 # Original butler was created with the default arguments:
802 # collections = None
803 # run = None
804 # inferDefaults = True
805 # no explicit default data ID
807 # Collections can be overridden, and default data ID will be inferred
808 # from it.
809 clone1 = butler.clone(collections="imported_g")
810 self.assertEqual(clone1.registry.defaults.dataId, {"instrument": "Cam1"})
811 self.assertCountEqual(clone1.registry.defaults.collections, ["imported_g"])
812 self.assertIsNone(clone1.run)
814 # Disabling inferDefaults stops default data ID from being inferred
815 # from collections.
816 clone2 = clone1.clone(inferDefaults=False)
817 self.assertEqual(clone2.registry.defaults.dataId, {})
818 self.assertCountEqual(clone2.registry.defaults.collections, ["imported_g"])
819 self.assertIsNone(clone2.run)
821 # Setting a new run doesn't override explicitly-set collections.
822 clone3 = clone2.clone(run="imported_r")
823 self.assertEqual(clone3.registry.defaults.dataId, {})
824 self.assertCountEqual(clone3.registry.defaults.collections, ["imported_g"])
825 self.assertEqual(clone3.run, "imported_r")
827 # Following the behavior of the Butler() constructor, run will populate
828 # collections if collections was None. Default data ID is inferred
829 # from the run collection.
830 clone4 = butler.clone(run="imported_r")
831 self.assertEqual(clone4.registry.defaults.dataId, {"instrument": "Cam1"})
832 self.assertCountEqual(clone4.registry.defaults.collections, ["imported_r"])
833 self.assertEqual(clone4.run, "imported_r")
835 # Explicitly set data ID is combined with inferred defaults from
836 # collections.
837 clone5 = clone4.clone(dataId={"skymap": "SkyMap1"})
838 self.assertEqual(clone5.registry.defaults.dataId, {"instrument": "Cam1", "skymap": "SkyMap1"})
839 self.assertCountEqual(clone5.registry.defaults.collections, ["imported_r"])
840 self.assertEqual(clone5.run, "imported_r")
842 # Disabling inferred defaults preserves explicitly set data ID
843 clone6 = clone5.clone(inferDefaults=False)
844 self.assertEqual(clone6.registry.defaults.dataId, {"skymap": "SkyMap1"})
845 self.assertCountEqual(clone5.registry.defaults.collections, ["imported_r"])
846 self.assertEqual(clone5.run, "imported_r")
848 def test_calibration_dataset_type_registration(self) -> None:
849 # Register two dataset types that should share the same tags table,
850 # but only one is a calibration and hence needs a calibs table.
851 butler1 = self.makeButler(writeable=True)
852 a = DatasetType("a", ["instrument"], universe=butler1.dimensions, storageClass="StructuredDataDict")
853 b = DatasetType(
854 "b",
855 ["instrument"],
856 universe=butler1.dimensions,
857 storageClass="StructuredDataDict",
858 isCalibration=True,
859 )
860 butler1.registry.registerDatasetType(a)
861 butler1.registry.registerDatasetType(b)
862 self.assertEqual(butler1.get_dataset_type("a"), a)
863 self.assertEqual(butler1.get_dataset_type("b"), b)
864 butler1.registry.refresh()
865 self.assertEqual(butler1.get_dataset_type("a"), a)
866 self.assertEqual(butler1.get_dataset_type("b"), b)
867 # Register them in the opposite order in a new repo.
868 butler2 = self.makeButler(writeable=True)
869 # Dataset types have to use correct universe and with RemoteButler
870 # each butler instance has its own universe instance.
871 a = DatasetType("a", ["instrument"], universe=butler2.dimensions, storageClass="StructuredDataDict")
872 b = DatasetType(
873 "b",
874 ["instrument"],
875 universe=butler2.dimensions,
876 storageClass="StructuredDataDict",
877 isCalibration=True,
878 )
879 butler2.registry.registerDatasetType(b)
880 butler2.registry.registerDatasetType(a)
881 self.assertEqual(butler2.get_dataset_type("a"), a)
882 self.assertEqual(butler2.get_dataset_type("b"), b)
883 butler2.registry.refresh()
884 self.assertEqual(butler2.get_dataset_type("a"), a)
885 self.assertEqual(butler2.get_dataset_type("b"), b)
888class DirectSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase):
889 """Run tests against DirectButler implementation."""
891 datasetsManager = (
892 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
893 )
895 collectionsManager = "lsst.daf.butler.registry.collections.synthIntKey.SynthIntKeyCollectionManager"
897 def setUp(self):
898 self.root = makeTestTempDir(TESTDIR)
900 def tearDown(self):
901 removeTestTempDir(self.root)
903 def makeButler(self, writeable: bool = False) -> Butler:
904 config = ButlerConfig()
906 # make separate temporary directory for registry of this instance
907 tmpdir = tempfile.mkdtemp(dir=self.root)
908 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
909 config["registry", "managers", "datasets"] = self.datasetsManager
910 config["registry", "managers", "collections"] = self.collectionsManager
911 config["root"] = self.root
913 # have to make a registry first
914 registryConfig = RegistryConfig(config.get("registry"))
915 registry = _RegistryFactory(registryConfig).create_from_config()
916 registry.close()
918 # Write the YAML file so that some tests can recreate butler from it.
919 config.dumpToUri(os.path.join(self.root, "butler.yaml"))
920 butler = Butler.from_config(config, writeable=writeable)
921 self.enterContext(butler)
922 DatastoreMock.apply(butler)
923 return butler
925 def test_dataset_uris(self):
926 """Test that dataset URIs can be parsed and retrieved."""
927 butler = self.makeButler(writeable=True)
928 butler.import_(filename="resource://lsst.daf.butler/tests/registry_data/base.yaml")
929 butler.import_(filename=f"resource://lsst.daf.butler/tests/registry_data/{self.datasetsImportFile}")
931 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
932 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
933 self.assertIsInstance(ref, DatasetRef)
935 # Get the butler root for the URI. It does have to be encoded
936 # in case there are special characters in the path.
937 config_dir = urllib.parse.quote(butler._config["root"])
939 # Read it via a repo label and a path.
940 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as index_file:
941 label = "test_repo"
942 index_file.write(f"{label}: {config_dir}\n")
943 index_file.flush()
944 with mock_env({"DAF_BUTLER_REPOSITORY_INDEX": index_file.name}):
945 butler_factory = LabeledButlerFactory()
946 self.addCleanup(butler_factory.close)
947 factory = butler_factory.bind(access_token=None)
949 for dataset_uri in (
950 f"ivo://org.rubinobs/usdac/test?repo={config_dir}&id={ref.id}",
951 f"ivo://org.rubinobs/ukdac/lsst-dr1?repo={config_dir}%2Fbutler.yaml&id={ref.id}",
952 f"butler://{label}/{ref.id}",
953 f"ivo://org.rubinobs/usdac/lsst-dp1?repo={label}&id={ref.id}",
954 ):
955 result = Butler.get_dataset_from_uri(dataset_uri)
956 self.enterContext(result.butler)
957 self.assertEqual(result.dataset, ref)
958 # The returned butler needs to have the datastore mocked.
959 DatastoreMock.apply(result.butler)
960 dataset_id, _ = result.butler.get(result.dataset)
961 self.assertEqual(dataset_id, ref.id)
963 factory_result = Butler.get_dataset_from_uri(dataset_uri, factory=factory)
964 self.enterContext(factory_result.butler)
965 self.assertEqual(factory_result.dataset, ref)
966 # The returned butler needs to have the datastore mocked.
967 DatastoreMock.apply(factory_result.butler)
968 dataset_id, _ = factory_result.butler.get(factory_result.dataset)
969 self.assertEqual(dataset_id, ref.id)
971 # Non existent dataset.
972 missing_id = str(ref.id).replace("2", "3")
973 result = Butler.get_dataset_from_uri(f"butler://{label}/{missing_id}")
974 self.enterContext(result.butler)
975 self.assertIsNone(result.dataset)
977 # Test some failure modes.
978 for dataset_uri in (
979 "butler://label/1234", # Bad UUID.
980 "butler://1234", # No UUID.
981 "butler:///1234", # No label.
982 "ivo://rubin/1234", # No query part and bad UUID and no label.
983 "ivo://rubin/datasets/dr1/82d79caa-0823-4300-9874-67b737367ee0", # No query part.
984 "ivo://org.rubinobs/datasets?repo=dr1&id=1234", # Bad UUID.
985 "ivo://org.rubinobs/butler?release=dr1&id=82d79caa-0823-4300-9874-67b737367ee0", # No repo key.
986 "ivo://org.rubinobs/butler?repo=dr1&repo=dr2&id=82d79caa-0823-4300-9874-67b737367ee0", # 2 vals.
987 "ivo://org.rubinobs/something?repo=%20&id=82d79caa-0823-4300-9874-67b737367ee0", # no repo.
988 "https://something.edu/1234", # Wrong scheme.
989 ):
990 with self.assertRaises(ValueError):
991 Butler.parse_dataset_uri(dataset_uri)
994class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase):
995 """Run tests against DirectButler implementation using the
996 NameKeyCollectionsManager.
997 """
999 collectionsManager = "lsst.daf.butler.registry.collections.nameKey.NameKeyCollectionManager"
1002@unittest.skipIf(not butler_server_is_available, butler_server_import_error)
1003class RemoteSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase):
1004 """Run tests against Butler client/server."""
1006 supportsCollectionRegex = False
1008 def makeButler(self, writeable: bool = False) -> Butler:
1009 server_instance = self.enterContext(create_test_server(TESTDIR))
1010 butler = server_instance.hybrid_butler
1011 DatastoreMock.apply(butler)
1012 # Because RemoteButler doesn't have a Datastore object, we have to
1013 # duplicate some of the functionality from DatastoreMock separately.
1014 butler._remote_butler._get_dataset_as_python_object = _mock_get_dataset_as_python_object
1015 return butler
1018def _mock_get_dataset_as_python_object(
1019 ref: DatasetRef,
1020 model: Any,
1021 parameters: dict[str, Any] | None,
1022) -> Any:
1023 """Mimic the functionality of DatastoreMock's get() mock."""
1024 return (ref.id, parameters)
1027if __name__ == "__main__":
1028 unittest.main()