Coverage for tests/test_simpleButler.py: 14%
325 statements
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:16 -0700
« prev ^ index » next coverage.py v7.5.0, created at 2024-05-02 03:16 -0700
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28from __future__ import annotations
30import json
31import os
32import re
33import tempfile
34import unittest
35from typing import Any
37try:
38 import numpy as np
39except ImportError:
40 np = None
42import astropy.time
43from lsst.daf.butler import (
44 Butler,
45 ButlerConfig,
46 CollectionType,
47 DataCoordinate,
48 DatasetId,
49 DatasetRef,
50 DatasetType,
51 StorageClass,
52 Timespan,
53)
54from lsst.daf.butler.datastore.file_templates import FileTemplate
55from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory
56from lsst.daf.butler.tests import DatastoreMock
57from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir
59try:
60 from lsst.daf.butler.tests.server import create_test_server
61except ImportError:
62 create_test_server = None
64TESTDIR = os.path.abspath(os.path.dirname(__file__))
67class SimpleButlerTests(TestCaseMixin):
68 """Tests for butler (including import/export functionality) that should not
69 depend on the Registry Database backend or Datastore implementation, and
70 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
71 """
73 datasetsImportFile = "datasets.yaml"
75 supportsCollectionRegex: bool = True
76 """True if the registry class being tested supports regex searches for
77 collections."""
79 def makeButler(self, writeable: bool = False) -> Butler:
80 raise NotImplementedError()
82 def comparableRef(self, ref: DatasetRef) -> DatasetRef:
83 """Return a DatasetRef that can be compared to a DatasetRef from
84 other repository.
86 For repositories that do not support round-trip of ID values this
87 method returns unresolved DatasetRef, for round-trip-safe repos it
88 returns unchanged ref.
89 """
90 return ref
92 def testReadBackwardsCompatibility(self):
93 """Test that we can read an export file written by a previous version
94 and commit to the daf_butler git repo.
96 Notes
97 -----
98 At present this export file includes only dimension data, not datasets,
99 which greatly limits the usefulness of this test. We should address
100 this at some point, but I think it's best to wait for the changes to
101 the export format required for CALIBRATION collections to land.
102 """
103 butler = self.makeButler(writeable=True)
104 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
105 # Spot-check a few things, but the most important test is just that
106 # the above does not raise.
107 self.assertGreaterEqual(
108 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")},
109 set(range(104)), # should have all science CCDs; may have some focus ones.
110 )
111 self.assertGreaterEqual(
112 {
113 (record.id, record.physical_filter)
114 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
115 },
116 {
117 (27136, "HSC-Z"),
118 (11694, "HSC-G"),
119 (23910, "HSC-R"),
120 (11720, "HSC-Y"),
121 (23900, "HSC-R"),
122 (22646, "HSC-Y"),
123 (1248, "HSC-I"),
124 (19680, "HSC-I"),
125 (1240, "HSC-I"),
126 (424, "HSC-Y"),
127 (19658, "HSC-I"),
128 (344, "HSC-Y"),
129 (1218, "HSC-R"),
130 (1190, "HSC-Z"),
131 (23718, "HSC-R"),
132 (11700, "HSC-G"),
133 (26036, "HSC-G"),
134 (23872, "HSC-R"),
135 (1170, "HSC-Z"),
136 (1876, "HSC-Y"),
137 },
138 )
140 def testDatasetTransfers(self):
141 """Test exporting all datasets from a repo and then importing them all
142 back in again.
143 """
144 # Import data to play with.
145 butler1 = self.makeButler(writeable=True)
146 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
147 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
148 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
149 # Export all datasets.
150 with butler1.export(filename=file.name) as exporter:
151 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
152 # Import it all again.
153 butler2 = self.makeButler(writeable=True)
154 butler2.import_(filename=file.name)
155 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
156 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
157 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
158 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
159 self.assertCountEqual(
160 [self.comparableRef(ref) for ref in datasets1],
161 [self.comparableRef(ref) for ref in datasets2],
162 )
164 def testImportTwice(self):
165 """Test exporting dimension records and datasets from a repo and then
166 importing them all back in again twice.
167 """
168 # Import data to play with.
169 butler1 = self.makeButler(writeable=True)
170 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
171 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
172 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file:
173 # Export all datasets.
174 with butler1.export(filename=file.name) as exporter:
175 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...))
176 butler2 = self.makeButler(writeable=True)
177 # Import it once.
178 butler2.import_(filename=file.name)
179 # Import it again
180 butler2.import_(filename=file.name)
181 datasets1 = list(butler1.registry.queryDatasets(..., collections=...))
182 datasets2 = list(butler2.registry.queryDatasets(..., collections=...))
183 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1))
184 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2))
185 self.assertCountEqual(
186 [self.comparableRef(ref) for ref in datasets1],
187 [self.comparableRef(ref) for ref in datasets2],
188 )
190 def testCollectionTransfers(self):
191 """Test exporting and then importing collections of various types."""
192 # Populate a registry with some datasets.
193 butler1 = self.makeButler(writeable=True)
194 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
195 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
196 registry1 = butler1.registry
197 # Add some more collections.
198 registry1.registerRun("run1")
199 registry1.registerCollection("tag1", CollectionType.TAGGED)
200 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
201 registry1.registerCollection("chain1", CollectionType.CHAINED)
202 registry1.registerCollection("chain2", CollectionType.CHAINED)
203 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
204 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
205 # Associate some datasets into the TAGGED and CALIBRATION collections.
206 flats1 = list(registry1.queryDatasets("flat", collections=...))
207 registry1.associate("tag1", flats1)
208 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
209 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
210 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
211 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
212 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
213 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
214 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
215 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
216 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
217 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
218 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
219 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty())
221 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file:
222 # Export all collections, and some datasets.
223 with butler1.export(filename=file.name) as exporter:
224 # Sort results to put chain1 before chain2, which is
225 # intentionally not topological order.
226 for collection in sorted(registry1.queryCollections()):
227 exporter.saveCollection(collection)
228 exporter.saveDatasets(flats1)
229 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b])
230 # Import them into a new registry.
231 butler2 = self.makeButler(writeable=True)
232 butler2.import_(filename=file.name)
233 registry2 = butler2.registry
234 # Check that it all round-tripped, starting with the collections
235 # themselves.
236 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
237 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
238 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
239 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
240 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
241 self.assertEqual(
242 list(registry2.getCollectionChain("chain1")),
243 ["tag1", "run1", "chain2"],
244 )
245 self.assertEqual(
246 list(registry2.getCollectionChain("chain2")),
247 ["calibration1", "run1"],
248 )
249 # Check that tag collection contents are the same.
250 self.maxDiff = None
251 self.assertCountEqual(
252 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")],
253 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")],
254 )
255 # Check that calibration collection contents are the same.
256 self.assertCountEqual(
257 [
258 (self.comparableRef(assoc.ref), assoc.timespan)
259 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")
260 ],
261 [
262 (self.comparableRef(assoc.ref), assoc.timespan)
263 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")
264 ],
265 )
267 def testButlerGet(self):
268 """Test that butler.get can work with different variants."""
269 # Import data to play with.
270 butler = self.makeButler(writeable=True)
271 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
272 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
274 # Find the DatasetRef for a flat
275 coll = "imported_g"
276 flat2g = butler.find_dataset(
277 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll
278 )
280 # Create a numpy integer to check that works fine
281 detector_np = np.int64(2) if np else 2
283 # Try to get it using different variations of dataId + keyword
284 # arguments
285 # Note that instrument.class_name does not work
286 variants = (
287 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
288 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
289 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
290 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
291 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
292 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
293 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
294 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
295 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
296 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
297 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
298 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
299 (
300 {"detector.name_in_raft": "b", "detector.raft": "A"},
301 {"instrument": "Cam1", "physical_filter": "Cam1-G"},
302 ),
303 (
304 {
305 "detector.name_in_raft": "b",
306 "detector.raft": "A",
307 "instrument": "Cam1",
308 "physical_filter": "Cam1-G",
309 },
310 {},
311 ),
312 # Duplicate (but valid) information.
313 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}),
314 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}),
315 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
316 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
317 )
319 for dataId, kwds in variants:
320 try:
321 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
322 except Exception as e:
323 e.add_note(f"dataId={dataId}, kwds={kwds}")
324 raise
325 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
327 # Check that bad combinations raise.
328 variants = (
329 # Inconsistent detector information.
330 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}),
331 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
332 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}),
333 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
334 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}),
335 # Under-specified.
336 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
337 # Spurious kwargs.
338 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}),
339 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
340 )
341 for dataId, kwds in variants:
342 with self.assertRaises((ValueError, LookupError)):
343 butler.get("flat", dataId=dataId, collections=coll, **kwds)
345 def testGetCalibration(self):
346 """Test that `Butler.get` can be used to fetch from
347 `~CollectionType.CALIBRATION` collections if the data ID includes
348 extra dimensions with temporal information.
349 """
350 # Import data to play with.
351 butler = self.makeButler(writeable=True)
352 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
353 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
354 # Certify some biases into a CALIBRATION collection.
355 registry = butler.registry
356 registry.registerCollection("calibs", CollectionType.CALIBRATION)
357 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")
358 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai")
359 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai")
360 bias1a = registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g")
361 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
362 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
363 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
364 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
365 registry.certify("calibs", [bias1a], Timespan(t1, t2))
366 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
367 registry.certify("calibs", [bias2b], Timespan(t2, None))
368 registry.certify("calibs", [bias3b], Timespan(t2, t3))
369 # Insert some exposure dimension data.
370 registry.insertDimensionData(
371 "group",
372 {"instrument": "Cam1", "group": "three"},
373 {"instrument": "Cam1", "group": "four"},
374 )
375 registry.insertDimensionData(
376 "day_obs",
377 {"instrument": "Cam1", "id": 20211114},
378 )
379 # Choose timespans for exposures within the above calibration ranges
380 # but make sure they are not identical to the full range.
381 exp_time = astropy.time.TimeDelta(15.0, format="sec", scale="tai")
382 span_delta = t2 - t1
383 exp3_begin = t1 + (span_delta / 2.0)
384 exp3_end = exp3_begin + exp_time
385 span_delta = t3 - t2
386 exp4_begin = t2 + (span_delta / 2.0)
387 exp4_end = exp4_begin + exp_time
388 registry.insertDimensionData(
389 "exposure",
390 {
391 "instrument": "Cam1",
392 "id": 3,
393 "obs_id": "three",
394 "timespan": Timespan(exp3_begin, exp3_end),
395 "physical_filter": "Cam1-G",
396 "group": "three",
397 "day_obs": 20211114,
398 "seq_num": 55,
399 },
400 {
401 "instrument": "Cam1",
402 "id": 4,
403 "obs_id": "four",
404 "timespan": Timespan(exp4_begin, exp4_end),
405 "physical_filter": "Cam1-G",
406 "group": "four",
407 "day_obs": 20211114,
408 "seq_num": 42,
409 },
410 )
411 # Get some biases from raw-like data IDs.
412 bias2a_id, _ = butler.get(
413 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs"
414 )
415 self.assertEqual(bias2a_id, bias2a.id)
416 bias3b_id, _ = butler.get(
417 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs"
418 )
419 self.assertEqual(bias3b_id, bias3b.id)
421 # Use explicit timespan and no exposure record.
422 bias3b_id, _ = butler.get(
423 "bias",
424 {"instrument": "Cam1", "detector": 3},
425 collections="calibs",
426 timespan=Timespan(exp4_begin, exp4_end),
427 )
428 self.assertEqual(bias3b_id, bias3b.id)
430 # No timespan at all.
431 # Only one matching dataset in calibs collection so this works with
432 # a defaulted timespan.
433 bias1a_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 1}, collections="calibs")
434 self.assertEqual(bias1a_id, bias1a.id)
436 # Multiple datasets match in calibs collection with infinite timespan
437 # so this fails.
438 with self.assertRaises(LookupError):
439 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 3}, collections="calibs")
441 # Get using the kwarg form
442 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs")
443 self.assertEqual(bias3b_id, bias3b.id)
445 # Do it again but using the record information
446 bias2a_id, _ = butler.get(
447 "bias",
448 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"},
449 collections="calibs",
450 )
451 self.assertEqual(bias2a_id, bias2a.id)
452 bias3b_id, _ = butler.get(
453 "bias",
454 {"exposure.obs_id": "four", "detector.full_name": "Ba"},
455 collections="calibs",
456 instrument="Cam1",
457 )
458 self.assertEqual(bias3b_id, bias3b.id)
460 # And again but this time using the alternate value rather than
461 # the primary.
462 bias3b_id, _ = butler.get(
463 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1"
464 )
465 self.assertEqual(bias3b_id, bias3b.id)
467 # And again but this time using the alternate value rather than
468 # the primary and do it in the keyword arguments.
469 bias3b_id, _ = butler.get(
470 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1"
471 )
472 self.assertEqual(bias3b_id, bias3b.id)
474 # Now with implied record columns
475 bias3b_id, _ = butler.get(
476 "bias",
477 day_obs=20211114,
478 seq_num=42,
479 raft="B",
480 name_in_raft="a",
481 collections="calibs",
482 instrument="Cam1",
483 )
484 self.assertEqual(bias3b_id, bias3b.id)
486 # Allow a fully-specified dataId and unnecessary extra information
487 # that comes from the record.
488 bias3b_id, _ = butler.get(
489 "bias",
490 dataId=dict(
491 exposure=4,
492 day_obs=20211114,
493 seq_num=42,
494 detector=3,
495 instrument="Cam1",
496 ),
497 collections="calibs",
498 )
499 self.assertEqual(bias3b_id, bias3b.id)
501 # Extra but inconsistent record values are a problem.
502 with self.assertRaises(ValueError):
503 bias3b_id, _ = butler.get(
504 "bias",
505 exposure=3,
506 day_obs=20211114,
507 seq_num=42,
508 detector=3,
509 collections="calibs",
510 instrument="Cam1",
511 )
513 # Ensure that spurious kwargs cause an exception.
514 with self.assertRaises(ValueError):
515 butler.get(
516 "bias",
517 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"},
518 collections="calibs",
519 instrument="Cam1",
520 )
522 with self.assertRaises(ValueError):
523 butler.get(
524 "bias",
525 day_obs=20211114,
526 seq_num=42,
527 raft="B",
528 name_in_raft="a",
529 collections="calibs",
530 instrument="Cam1",
531 immediate=True,
532 )
534 def testRegistryDefaults(self):
535 """Test that we can default the collections and some data ID keys when
536 constructing a butler.
538 Many tests that use default run already exist in ``test_butler.py``, so
539 that isn't tested here. And while most of this functionality is
540 implemented in `Registry`, we test it here instead of
541 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
542 database backend at all.
543 """
544 butler = self.makeButler(writeable=True)
545 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
546 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
547 # Need to actually set defaults later, not at construction, because
548 # we need to import the instrument before we can use it as a default.
549 # Don't set a default instrument value for data IDs, because 'Cam1'
550 # should be inferred by virtue of that being the only value in the
551 # input collections.
552 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
553 # Use findDataset without collections or instrument.
554 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
555 # Do the same with Butler.get; this should ultimately invoke a lot of
556 # the same code, so it's a bit circular, but mostly we're checking that
557 # it works at all.
558 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
559 self.assertEqual(ref.id, dataset_id)
560 # Query for datasets. Test defaulting the data ID in both kwargs and
561 # in the WHERE expression.
562 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
563 self.assertEqual({ref}, queried_refs_1)
564 queried_refs_2 = set(
565 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'")
566 )
567 self.assertEqual({ref}, queried_refs_2)
568 # Query for data IDs with a dataset constraint.
569 queried_data_ids = set(
570 butler.registry.queryDataIds(
571 {"instrument", "detector", "physical_filter"},
572 datasets={"flat"},
573 detector=2,
574 physical_filter="Cam1-G",
575 )
576 )
577 self.assertEqual({ref.dataId}, queried_data_ids)
578 # Add another instrument to the repo, and a dataset that uses it to
579 # the `imported_g` collection.
580 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
581 camera = DatasetType(
582 "camera",
583 dimensions=butler.dimensions["instrument"].graph,
584 storageClass="Camera",
585 )
586 butler.registry.registerDatasetType(camera)
587 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
588 # Initialize a new butler with `imported_g` as its default run.
589 # This should not have a default instrument, because there are two.
590 # Pass run instead of collections; this should set both.
591 butler2 = Butler.from_config(butler=butler, run="imported_g")
592 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
593 self.assertEqual(butler2.registry.defaults.run, "imported_g")
594 self.assertFalse(butler2.registry.defaults.dataId)
595 # Initialize a new butler with an instrument default explicitly given.
596 # Set collections instead of run, which should then be None.
597 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2")
598 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
599 self.assertIsNone(butler3.registry.defaults.run, None)
600 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"})
602 # Check that repr() does not fail.
603 defaults = RegistryDefaults(collections=["imported_g"], run="test")
604 r = repr(defaults)
605 self.assertIn("collections=('imported_g',)", r)
606 self.assertIn("run='test'", r)
608 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix")
609 r = repr(defaults)
610 self.assertIn("skypix='pix'", r)
611 self.assertIn("instrument='DummyCam'", r)
613 def testJson(self):
614 """Test JSON serialization mediated by registry."""
615 butler = self.makeButler(writeable=True)
616 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
617 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile))
618 # Need to actually set defaults later, not at construction, because
619 # we need to import the instrument before we can use it as a default.
620 # Don't set a default instrument value for data IDs, because 'Cam1'
621 # should be inferred by virtue of that being the only value in the
622 # input collections.
623 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
624 # Use findDataset without collections or instrument.
625 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G")
627 # Transform the ref and dataset type to and from JSON
628 # and check that it can be reconstructed properly
630 # Do it with the ref and a component ref in minimal and standard form
631 compRef = ref.makeComponentRef("wcs")
633 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType):
634 for minimal in (False, True):
635 json_str = test_item.to_json(minimal=minimal)
636 from_json = type(test_item).from_json(json_str, registry=butler.registry)
637 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry")
639 # for minimal=False case also do a test without registry
640 if not minimal:
641 from_json = type(test_item).from_json(json_str, universe=butler.dimensions)
642 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe")
644 def test_populated_by(self):
645 """Test that dimension records can find other records."""
646 butler = self.makeButler(writeable=True)
647 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
649 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table)
651 # Get a visit-based dataId.
652 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC"))
654 # Request all the records related to it.
655 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements)
657 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}")
658 self.assertIn(butler.dimensions["visit_system_membership"], records)
659 self.assertIn(butler.dimensions["visit_system"], records)
661 def testJsonDimensionRecordsAndHtmlRepresentation(self):
662 # Dimension Records
663 butler = self.makeButler(writeable=True)
664 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
666 for dimension in ("detector", "visit", "exposure", "day_obs", "group"):
667 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC")
668 for r in records:
669 for minimal in (True, False):
670 json_str = r.to_json(minimal=minimal)
671 r_json = type(r).from_json(json_str, registry=butler.registry)
672 self.assertEqual(r_json, r)
673 # check with direct method
674 simple = r.to_simple()
675 fromDirect = type(simple).direct(**json.loads(json_str))
676 self.assertEqual(simple, fromDirect)
677 # Also check equality of each of the components as dicts
678 self.assertEqual(r_json.toDict(), r.toDict())
680 # check the html representation of records
681 r_html = r._repr_html_()
682 self.assertTrue(isinstance(r_html, str))
683 self.assertIn(dimension, r_html)
685 def test_dimension_records_import(self):
686 # Dimension Records
687 butler = self.makeButler(writeable=True)
688 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset-v0.yaml"))
690 # Count records and assume this means it worked.
691 dimensions = (
692 ("day_obs", 15),
693 ("group", 1),
694 ("exposure", 1),
695 ("visit", 160),
696 ("detector", 111),
697 ("visit_system_membership", 160),
698 )
699 for dimension, count in dimensions:
700 records = list(butler.registry.queryDimensionRecords(dimension, instrument="HSC"))
701 self.assertEqual(len(records), count)
703 def testWildcardQueries(self):
704 """Test that different collection type queries work."""
705 # Import data to play with.
706 butler = self.makeButler(writeable=True)
707 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
709 # Create some collections
710 created = {"collection", "u/user/test", "coll3"}
711 for collection in created:
712 butler.registry.registerCollection(collection, type=CollectionType.RUN)
714 collections = butler.registry.queryCollections()
715 self.assertEqual(set(collections), created)
717 expressions = [
718 ("collection", {"collection"}),
719 (..., created),
720 ("*", created),
721 (("collection", "*"), created),
722 ("u/*", {"u/user/test"}),
723 ("*oll*", {"collection", "coll3"}),
724 ("*[0-9]", {"coll3"}),
725 ]
726 if self.supportsCollectionRegex:
727 expressions.extend(
728 [
729 (re.compile("u.*"), {"u/user/test"}),
730 (re.compile(".*oll.*"), {"collection", "coll3"}),
731 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}),
732 ]
733 )
734 for expression, expected in expressions:
735 result = butler.registry.queryCollections(expression)
736 self.assertEqual(set(result), expected)
738 def test_skypix_templates(self):
739 """Test that skypix templates can work."""
740 # Dimension Records
741 butler = self.makeButler(writeable=True)
742 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
744 sc = StorageClass("null")
745 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions)
746 dataId = butler.registry.expandDataId(
747 DataCoordinate.standardize(
748 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions
749 )
750 )
751 ref = DatasetRef(dataset_type, dataId, run="test")
752 self.assertTrue(ref.dataId.hasRecords())
754 tmplstr = "{run}/{datasetType}/{visit.name|exposure.obs_id}_{skypix}_{htm7}_{skypix.id}_{htm7.id}"
755 file_template = FileTemplate(tmplstr)
756 path = file_template.format(ref)
757 self.assertEqual(path, "test/warp/HSCA02713600_12345_12345_12345_12345")
760class DirectSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase):
761 """Run tests against DirectButler implementation."""
763 datasetsManager = (
764 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID"
765 )
767 collectionsManager = "lsst.daf.butler.registry.collections.synthIntKey.SynthIntKeyCollectionManager"
769 def setUp(self):
770 self.root = makeTestTempDir(TESTDIR)
772 def tearDown(self):
773 removeTestTempDir(self.root)
775 def makeButler(self, writeable: bool = False) -> Butler:
776 config = ButlerConfig()
778 # make separate temporary directory for registry of this instance
779 tmpdir = tempfile.mkdtemp(dir=self.root)
780 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
781 config["registry", "managers", "datasets"] = self.datasetsManager
782 config["registry", "managers", "collections"] = self.collectionsManager
783 config["root"] = self.root
785 # have to make a registry first
786 registryConfig = RegistryConfig(config.get("registry"))
787 _RegistryFactory(registryConfig).create_from_config()
789 butler = Butler.from_config(config, writeable=writeable)
790 DatastoreMock.apply(butler)
791 return butler
794class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase):
795 """Run tests against DirectButler implementation using the
796 NameKeyCollectionsManager.
797 """
799 collectionsManager = "lsst.daf.butler.registry.collections.nameKey.NameKeyCollectionManager"
802@unittest.skipIf(create_test_server is None, "Server dependencies not installed.")
803class RemoteSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase):
804 """Run tests against Butler client/server."""
806 supportsCollectionRegex = False
808 def makeButler(self, writeable: bool = False) -> Butler:
809 server_instance = self.enterContext(create_test_server(TESTDIR))
810 butler = server_instance.hybrid_butler
811 DatastoreMock.apply(butler)
812 # Because RemoteButler doesn't have a Datastore object, we have to
813 # duplicate some of the functionality from DatastoreMock separately.
814 butler._remote_butler._get_dataset_as_python_object = _mock_get_dataset_as_python_object
815 return butler
818def _mock_get_dataset_as_python_object(
819 ref: DatasetRef,
820 model: Any,
821 parameters: dict[str, Any] | None,
822) -> Any:
823 """Mimic the functionality of DatastoreMock's get() mock."""
824 return (ref.id, parameters)
827if __name__ == "__main__":
828 unittest.main()