Coverage for tests/test_simpleButler.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22from __future__ import annotations
24import os
25import tempfile
26from typing import Any
27import unittest
29try:
30 import numpy as np
31except ImportError:
32 np = None
34import astropy.time
36from lsst.daf.butler import (
37 Butler,
38 ButlerConfig,
39 CollectionType,
40 DatasetType,
41 Registry,
42 Timespan,
43)
44from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults
45from lsst.daf.butler.tests import DatastoreMock
46from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir
49TESTDIR = os.path.abspath(os.path.dirname(__file__))
52class SimpleButlerTestCase(unittest.TestCase):
53 """Tests for butler (including import/export functionality) that should not
54 depend on the Registry Database backend or Datastore implementation, and
55 can instead utilize an in-memory SQLite Registry and a mocked Datastore.
56 """
58 def setUp(self):
59 self.root = makeTestTempDir(TESTDIR)
61 def tearDown(self):
62 removeTestTempDir(self.root)
64 def makeButler(self, **kwargs: Any) -> Butler:
65 """Return new Butler instance on each call.
66 """
67 config = ButlerConfig()
69 # make separate temporary directory for registry of this instance
70 tmpdir = tempfile.mkdtemp(dir=self.root)
71 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3"
72 config["root"] = self.root
74 # have to make a registry first
75 registryConfig = RegistryConfig(config.get("registry"))
76 Registry.createFromConfig(registryConfig)
78 butler = Butler(config, **kwargs)
79 DatastoreMock.apply(butler)
80 return butler
82 def testReadBackwardsCompatibility(self):
83 """Test that we can read an export file written by a previous version
84 and commit to the daf_butler git repo.
86 Notes
87 -----
88 At present this export file includes only dimension data, not datasets,
89 which greatly limits the usefulness of this test. We should address
90 this at some point, but I think it's best to wait for the changes to
91 the export format required for CALIBRATION collections to land.
92 """
93 butler = self.makeButler(writeable=True)
94 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml"))
95 # Spot-check a few things, but the most important test is just that
96 # the above does not raise.
97 self.assertGreaterEqual(
98 set(record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")),
99 set(range(104)), # should have all science CCDs; may have some focus ones.
100 )
101 self.assertGreaterEqual(
102 {
103 (record.id, record.physical_filter)
104 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC")
105 },
106 {
107 (27136, 'HSC-Z'),
108 (11694, 'HSC-G'),
109 (23910, 'HSC-R'),
110 (11720, 'HSC-Y'),
111 (23900, 'HSC-R'),
112 (22646, 'HSC-Y'),
113 (1248, 'HSC-I'),
114 (19680, 'HSC-I'),
115 (1240, 'HSC-I'),
116 (424, 'HSC-Y'),
117 (19658, 'HSC-I'),
118 (344, 'HSC-Y'),
119 (1218, 'HSC-R'),
120 (1190, 'HSC-Z'),
121 (23718, 'HSC-R'),
122 (11700, 'HSC-G'),
123 (26036, 'HSC-G'),
124 (23872, 'HSC-R'),
125 (1170, 'HSC-Z'),
126 (1876, 'HSC-Y'),
127 }
128 )
130 def testDatasetTransfers(self):
131 """Test exporting all datasets from a repo and then importing them all
132 back in again.
133 """
134 # Import data to play with.
135 butler1 = self.makeButler(writeable=True)
136 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
137 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
138 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file:
139 # Export all datasets.
140 with butler1.export(filename=file.name) as exporter:
141 exporter.saveDatasets(
142 butler1.registry.queryDatasets(..., collections=...)
143 )
144 # Import it all again.
145 butler2 = self.makeButler(writeable=True)
146 butler2.import_(filename=file.name)
147 # Check that it all round-tripped. Use unresolved() to make
148 # comparison not care about dataset_id values, which may be
149 # rewritten.
150 self.assertCountEqual(
151 [ref.unresolved() for ref in butler1.registry.queryDatasets(..., collections=...)],
152 [ref.unresolved() for ref in butler2.registry.queryDatasets(..., collections=...)],
153 )
155 def testCollectionTransfers(self):
156 """Test exporting and then importing collections of various types.
157 """
158 # Populate a registry with some datasets.
159 butler1 = self.makeButler(writeable=True)
160 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
161 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
162 registry1 = butler1.registry
163 # Add some more collections.
164 registry1.registerRun("run1")
165 registry1.registerCollection("tag1", CollectionType.TAGGED)
166 registry1.registerCollection("calibration1", CollectionType.CALIBRATION)
167 registry1.registerCollection("chain1", CollectionType.CHAINED)
168 registry1.registerCollection("chain2", CollectionType.CHAINED)
169 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"])
170 registry1.setCollectionChain("chain2", ["calibration1", "run1"])
171 # Associate some datasets into the TAGGED and CALIBRATION collections.
172 flats1 = list(registry1.queryDatasets("flat", collections=...))
173 registry1.associate("tag1", flats1)
174 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
175 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
176 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
177 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
178 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
179 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
180 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
181 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2))
182 registry1.certify("calibration1", [bias2b], Timespan(t2, None))
183 registry1.certify("calibration1", [bias3b], Timespan(t2, t3))
185 with tempfile.NamedTemporaryFile(mode='w', suffix=".yaml") as file:
186 # Export all collections, and some datasets.
187 with butler1.export(filename=file.name) as exporter:
188 # Sort results to put chain1 before chain2, which is
189 # intentionally not topological order.
190 for collection in sorted(registry1.queryCollections()):
191 exporter.saveCollection(collection)
192 exporter.saveDatasets(flats1)
193 exporter.saveDatasets([bias2a, bias2b, bias3a, bias3b])
194 # Import them into a new registry.
195 butler2 = self.makeButler(writeable=True)
196 butler2.import_(filename=file.name)
197 registry2 = butler2.registry
198 # Check that it all round-tripped, starting with the collections
199 # themselves.
200 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN)
201 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED)
202 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION)
203 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED)
204 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED)
205 self.assertEqual(
206 list(registry2.getCollectionChain("chain1")),
207 ["tag1", "run1", "chain2"],
208 )
209 self.assertEqual(
210 list(registry2.getCollectionChain("chain2")),
211 ["calibration1", "run1"],
212 )
213 # Check that tag collection contents are the same.
214 self.maxDiff = None
215 self.assertCountEqual(
216 [ref.unresolved() for ref in registry1.queryDatasets(..., collections="tag1")],
217 [ref.unresolved() for ref in registry2.queryDatasets(..., collections="tag1")],
218 )
219 # Check that calibration collection contents are the same.
220 self.assertCountEqual(
221 [(assoc.ref.unresolved(), assoc.timespan)
222 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1")],
223 [(assoc.ref.unresolved(), assoc.timespan)
224 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1")],
225 )
227 def testButlerGet(self):
228 """Test that butler.get can work with different variants."""
230 # Import data to play with.
231 butler = self.makeButler(writeable=True)
232 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
233 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
235 # Find the DatasetRef for a flat
236 coll = "imported_g"
237 flat2g = butler.registry.findDataset("flat", instrument="Cam1", detector=2, physical_filter="Cam1-G",
238 collections=coll)
240 # Create a numpy integer to check that works fine
241 detector_np = np.int64(2) if np else 2
242 print(type(detector_np))
244 # Try to get it using different variations of dataId + keyword
245 # arguments
246 # Note that instrument.class_name does not work
247 variants = (
248 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}),
249 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}),
250 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}),
251 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}),
252 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}),
253 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
254 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
255 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
256 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
257 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
258 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}),
259 ({"detector.name_in_raft": "b", "detector.raft": "A"},
260 {"instrument": "Cam1", "physical_filter": "Cam1-G"}),
261 ({"detector.name_in_raft": "b", "detector.raft": "A",
262 "instrument": "Cam1", "physical_filter": "Cam1-G"}, {}),
263 )
265 for dataId, kwds in variants:
266 try:
267 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds)
268 except Exception as e:
269 raise type(e)(f"{str(e)}: dataId={dataId}, kwds={kwds}") from e
270 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}")
272 def testGetCalibration(self):
273 """Test that `Butler.get` can be used to fetch from
274 `~CollectionType.CALIBRATION` collections if the data ID includes
275 extra dimensions with temporal information.
276 """
277 # Import data to play with.
278 butler = self.makeButler(writeable=True)
279 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
280 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
281 # Certify some biases into a CALIBRATION collection.
282 registry = butler.registry
283 registry.registerCollection("calibs", CollectionType.CALIBRATION)
284 t1 = astropy.time.Time('2020-01-01T01:00:00', format="isot", scale="tai")
285 t2 = astropy.time.Time('2020-01-01T02:00:00', format="isot", scale="tai")
286 t3 = astropy.time.Time('2020-01-01T03:00:00', format="isot", scale="tai")
287 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g")
288 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g")
289 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r")
290 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r")
291 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2))
292 registry.certify("calibs", [bias2b], Timespan(t2, None))
293 registry.certify("calibs", [bias3b], Timespan(t2, t3))
294 # Insert some exposure dimension data.
295 registry.insertDimensionData(
296 "exposure",
297 {
298 "instrument": "Cam1",
299 "id": 3,
300 "obs_id": "three",
301 "timespan": Timespan(t1, t2),
302 "physical_filter": "Cam1-G",
303 "day_obs": 20201114,
304 "seq_num": 55,
305 },
306 {
307 "instrument": "Cam1",
308 "id": 4,
309 "obs_id": "four",
310 "timespan": Timespan(t2, t3),
311 "physical_filter": "Cam1-G",
312 "day_obs": 20211114,
313 "seq_num": 42,
314 },
315 )
316 # Get some biases from raw-like data IDs.
317 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 3, "detector": 2},
318 collections="calibs")
319 self.assertEqual(bias2a_id, bias2a.id)
320 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure": 4, "detector": 3},
321 collections="calibs")
322 self.assertEqual(bias3b_id, bias3b.id)
324 # Get using the kwarg form
325 bias3b_id, _ = butler.get("bias",
326 instrument="Cam1", exposure=4, detector=3,
327 collections="calibs")
328 self.assertEqual(bias3b_id, bias3b.id)
330 # Do it again but using the record information
331 bias2a_id, _ = butler.get("bias", {"instrument": "Cam1", "exposure.obs_id": "three",
332 "detector.full_name": "Ab"},
333 collections="calibs")
334 self.assertEqual(bias2a_id, bias2a.id)
335 bias3b_id, _ = butler.get("bias", {"exposure.obs_id": "four",
336 "detector.full_name": "Ba"},
337 collections="calibs", instrument="Cam1")
338 self.assertEqual(bias3b_id, bias3b.id)
340 # And again but this time using the alternate value rather than
341 # the primary.
342 bias3b_id, _ = butler.get("bias", {"exposure": "four",
343 "detector": "Ba"},
344 collections="calibs", instrument="Cam1")
345 self.assertEqual(bias3b_id, bias3b.id)
347 # And again but this time using the alternate value rather than
348 # the primary and do it in the keyword arguments.
349 bias3b_id, _ = butler.get("bias",
350 exposure="four", detector="Ba",
351 collections="calibs", instrument="Cam1")
352 self.assertEqual(bias3b_id, bias3b.id)
354 # Now with implied record columns
355 bias3b_id, _ = butler.get("bias", day_obs=20211114, seq_num=42,
356 raft="B", name_in_raft="a",
357 collections="calibs", instrument="Cam1")
358 self.assertEqual(bias3b_id, bias3b.id)
360 def testRegistryDefaults(self):
361 """Test that we can default the collections and some data ID keys when
362 constructing a butler.
364 Many tests that use default run already exist in ``test_butler.py``, so
365 that isn't tested here. And while most of this functionality is
366 implemented in `Registry`, we test it here instead of
367 ``daf/butler/tests/registry.py`` because it shouldn't depend on the
368 database backend at all.
369 """
370 butler = self.makeButler(writeable=True)
371 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml"))
372 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml"))
373 # Need to actually set defaults later, not at construction, because
374 # we need to import the instrument before we can use it as a default.
375 # Don't set a default instrument value for data IDs, because 'Cam1'
376 # should be inferred by virtue of that being the only value in the
377 # input collections.
378 butler.registry.defaults = RegistryDefaults(collections=["imported_g"])
379 # Use findDataset without collections or instrument.
380 ref = butler.registry.findDataset("flat", detector=2, physical_filter="Cam1-G")
381 # Do the same with Butler.get; this should ultimately invoke a lot of
382 # the same code, so it's a bit circular, but mostly we're checking that
383 # it works at all.
384 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G")
385 self.assertEqual(ref.id, dataset_id)
386 # Query for datasets. Test defaulting the data ID in both kwargs and
387 # in the WHERE expression.
388 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G"))
389 self.assertEqual({ref}, queried_refs_1)
390 queried_refs_2 = set(butler.registry.queryDatasets("flat",
391 where="detector=2 AND physical_filter='Cam1-G'"))
392 self.assertEqual({ref}, queried_refs_2)
393 # Query for data IDs with a dataset constraint.
394 queried_data_ids = set(butler.registry.queryDataIds({"instrument", "detector", "physical_filter"},
395 datasets={"flat"},
396 detector=2, physical_filter="Cam1-G"))
397 self.assertEqual({ref.dataId}, queried_data_ids)
398 # Add another instrument to the repo, and a dataset that uses it to
399 # the `imported_g` collection.
400 butler.registry.insertDimensionData("instrument", {"name": "Cam2"})
401 camera = DatasetType(
402 "camera",
403 dimensions=butler.registry.dimensions["instrument"].graph,
404 storageClass="Camera",
405 )
406 butler.registry.registerDatasetType(camera)
407 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g")
408 # Initialize a new butler with `imported_g` as its default run.
409 # This should not have a default instrument, because there are two.
410 # Pass run instead of collections; this should set both.
411 butler2 = Butler(butler=butler, run="imported_g")
412 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"])
413 self.assertEqual(butler2.registry.defaults.run, "imported_g")
414 self.assertFalse(butler2.registry.defaults.dataId)
415 # Initialize a new butler with an instrument default explicitly given.
416 # Set collections instead of run, which should then be None.
417 butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2")
418 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"])
419 self.assertIsNone(butler3.registry.defaults.run, None)
420 self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"})
423if __name__ == "__main__": 423 ↛ 424line 423 didn't jump to line 424, because the condition on line 423 was never true
424 unittest.main()