Coverage for tests/test_simpleButler.py: 14%

329 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-13 09:58 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import json 

31import os 

32import re 

33import tempfile 

34import unittest 

35from typing import Any 

36 

37try: 

38 import numpy as np 

39except ImportError: 

40 np = None 

41 

42import astropy.time 

43from lsst.daf.butler import ( 

44 Butler, 

45 ButlerConfig, 

46 CollectionType, 

47 DataCoordinate, 

48 DatasetId, 

49 DatasetRef, 

50 DatasetType, 

51 StorageClass, 

52 Timespan, 

53) 

54from lsst.daf.butler.datastore.file_templates import FileTemplate 

55from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, _RegistryFactory 

56from lsst.daf.butler.tests import DatastoreMock 

57from lsst.daf.butler.tests.utils import TestCaseMixin, makeTestTempDir, removeTestTempDir 

58 

59try: 

60 from lsst.daf.butler.tests.server import create_test_server 

61except ImportError: 

62 create_test_server = None 

63 

64TESTDIR = os.path.abspath(os.path.dirname(__file__)) 

65 

66 

67class SimpleButlerTests(TestCaseMixin): 

68 """Tests for butler (including import/export functionality) that should not 

69 depend on the Registry Database backend or Datastore implementation, and 

70 can instead utilize an in-memory SQLite Registry and a mocked Datastore. 

71 """ 

72 

73 datasetsImportFile = "datasets.yaml" 

74 

75 supportsCollectionRegex: bool = True 

76 """True if the registry class being tested supports regex searches for 

77 collections.""" 

78 

79 def makeButler(self, writeable: bool = False) -> Butler: 

80 raise NotImplementedError() 

81 

82 def comparableRef(self, ref: DatasetRef) -> DatasetRef: 

83 """Return a DatasetRef that can be compared to a DatasetRef from 

84 other repository. 

85 

86 For repositories that do not support round-trip of ID values this 

87 method returns unresolved DatasetRef, for round-trip-safe repos it 

88 returns unchanged ref. 

89 """ 

90 return ref 

91 

92 def testReadBackwardsCompatibility(self): 

93 """Test that we can read an export file written by a previous version 

94 and commit to the daf_butler git repo. 

95 

96 Notes 

97 ----- 

98 At present this export file includes only dimension data, not datasets, 

99 which greatly limits the usefulness of this test. We should address 

100 this at some point, but I think it's best to wait for the changes to 

101 the export format required for CALIBRATION collections to land. 

102 """ 

103 butler = self.makeButler(writeable=True) 

104 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

105 # Spot-check a few things, but the most important test is just that 

106 # the above does not raise. 

107 self.assertGreaterEqual( 

108 {record.id for record in butler.registry.queryDimensionRecords("detector", instrument="HSC")}, 

109 set(range(104)), # should have all science CCDs; may have some focus ones. 

110 ) 

111 self.assertGreaterEqual( 

112 { 

113 (record.id, record.physical_filter) 

114 for record in butler.registry.queryDimensionRecords("visit", instrument="HSC") 

115 }, 

116 { 

117 (27136, "HSC-Z"), 

118 (11694, "HSC-G"), 

119 (23910, "HSC-R"), 

120 (11720, "HSC-Y"), 

121 (23900, "HSC-R"), 

122 (22646, "HSC-Y"), 

123 (1248, "HSC-I"), 

124 (19680, "HSC-I"), 

125 (1240, "HSC-I"), 

126 (424, "HSC-Y"), 

127 (19658, "HSC-I"), 

128 (344, "HSC-Y"), 

129 (1218, "HSC-R"), 

130 (1190, "HSC-Z"), 

131 (23718, "HSC-R"), 

132 (11700, "HSC-G"), 

133 (26036, "HSC-G"), 

134 (23872, "HSC-R"), 

135 (1170, "HSC-Z"), 

136 (1876, "HSC-Y"), 

137 }, 

138 ) 

139 

140 def testDatasetTransfers(self): 

141 """Test exporting all datasets from a repo and then importing them all 

142 back in again. 

143 """ 

144 # Import data to play with. 

145 butler1 = self.makeButler(writeable=True) 

146 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

147 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

148 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

149 # Export all datasets. 

150 with butler1.export(filename=file.name) as exporter: 

151 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

152 # Import it all again. 

153 butler2 = self.makeButler(writeable=True) 

154 butler2.import_(filename=file.name) 

155 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

156 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

157 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

158 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

159 self.assertCountEqual( 

160 [self.comparableRef(ref) for ref in datasets1], 

161 [self.comparableRef(ref) for ref in datasets2], 

162 ) 

163 

164 def testImportTwice(self): 

165 """Test exporting dimension records and datasets from a repo and then 

166 importing them all back in again twice. 

167 """ 

168 # Import data to play with. 

169 butler1 = self.makeButler(writeable=True) 

170 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

171 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

172 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as file: 

173 # Export all datasets. 

174 with butler1.export(filename=file.name) as exporter: 

175 exporter.saveDatasets(butler1.registry.queryDatasets(..., collections=...)) 

176 butler2 = self.makeButler(writeable=True) 

177 # Import it once. 

178 butler2.import_(filename=file.name) 

179 # Import it again 

180 butler2.import_(filename=file.name) 

181 datasets1 = list(butler1.registry.queryDatasets(..., collections=...)) 

182 datasets2 = list(butler2.registry.queryDatasets(..., collections=...)) 

183 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets1)) 

184 self.assertTrue(all(isinstance(ref.id, DatasetId) for ref in datasets2)) 

185 self.assertCountEqual( 

186 [self.comparableRef(ref) for ref in datasets1], 

187 [self.comparableRef(ref) for ref in datasets2], 

188 ) 

189 

190 def testCollectionTransfers(self): 

191 """Test exporting and then importing collections of various types.""" 

192 # Populate a registry with some datasets. 

193 butler1 = self.makeButler(writeable=True) 

194 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

195 butler1.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

196 registry1 = butler1.registry 

197 # Add some more collections. 

198 registry1.registerRun("run1") 

199 registry1.registerCollection("tag1", CollectionType.TAGGED) 

200 registry1.registerCollection("calibration1", CollectionType.CALIBRATION) 

201 registry1.registerCollection("chain1", CollectionType.CHAINED) 

202 registry1.registerCollection("chain2", CollectionType.CHAINED) 

203 registry1.setCollectionChain("chain1", ["tag1", "run1", "chain2"]) 

204 registry1.setCollectionChain("chain2", ["calibration1", "run1"]) 

205 # Associate some datasets into the TAGGED and CALIBRATION collections. 

206 flats1 = list(registry1.queryDatasets("flat", collections=...)) 

207 registry1.associate("tag1", flats1) 

208 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

209 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

210 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

211 bias1a = registry1.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

212 bias2a = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

213 bias3a = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

214 bias2b = registry1.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

215 bias3b = registry1.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

216 registry1.certify("calibration1", [bias2a, bias3a], Timespan(t1, t2)) 

217 registry1.certify("calibration1", [bias2b], Timespan(t2, None)) 

218 registry1.certify("calibration1", [bias3b], Timespan(t2, t3)) 

219 registry1.certify("calibration1", [bias1a], Timespan.makeEmpty()) 

220 

221 with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") as file: 

222 # Export all collections, and some datasets. 

223 with butler1.export(filename=file.name) as exporter: 

224 # Sort results to put chain1 before chain2, which is 

225 # intentionally not topological order. 

226 for collection in sorted(registry1.queryCollections()): 

227 exporter.saveCollection(collection) 

228 exporter.saveDatasets(flats1) 

229 exporter.saveDatasets([bias1a, bias2a, bias2b, bias3a, bias3b]) 

230 # Import them into a new registry. 

231 butler2 = self.makeButler(writeable=True) 

232 butler2.import_(filename=file.name) 

233 registry2 = butler2.registry 

234 # Check that it all round-tripped, starting with the collections 

235 # themselves. 

236 self.assertIs(registry2.getCollectionType("run1"), CollectionType.RUN) 

237 self.assertIs(registry2.getCollectionType("tag1"), CollectionType.TAGGED) 

238 self.assertIs(registry2.getCollectionType("calibration1"), CollectionType.CALIBRATION) 

239 self.assertIs(registry2.getCollectionType("chain1"), CollectionType.CHAINED) 

240 self.assertIs(registry2.getCollectionType("chain2"), CollectionType.CHAINED) 

241 self.assertEqual( 

242 list(registry2.getCollectionChain("chain1")), 

243 ["tag1", "run1", "chain2"], 

244 ) 

245 self.assertEqual( 

246 list(registry2.getCollectionChain("chain2")), 

247 ["calibration1", "run1"], 

248 ) 

249 # Check that tag collection contents are the same. 

250 self.maxDiff = None 

251 self.assertCountEqual( 

252 [self.comparableRef(ref) for ref in registry1.queryDatasets(..., collections="tag1")], 

253 [self.comparableRef(ref) for ref in registry2.queryDatasets(..., collections="tag1")], 

254 ) 

255 # Check that calibration collection contents are the same. 

256 self.assertCountEqual( 

257 [ 

258 (self.comparableRef(assoc.ref), assoc.timespan) 

259 for assoc in registry1.queryDatasetAssociations("bias", collections="calibration1") 

260 ], 

261 [ 

262 (self.comparableRef(assoc.ref), assoc.timespan) 

263 for assoc in registry2.queryDatasetAssociations("bias", collections="calibration1") 

264 ], 

265 ) 

266 

267 def testButlerGet(self): 

268 """Test that butler.get can work with different variants.""" 

269 # Import data to play with. 

270 butler = self.makeButler(writeable=True) 

271 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

272 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

273 

274 # Find the DatasetRef for a flat 

275 coll = "imported_g" 

276 flat2g = butler.find_dataset( 

277 "flat", instrument="Cam1", full_name="Ab", physical_filter="Cam1-G", collections=coll 

278 ) 

279 

280 # Create a numpy integer to check that works fine 

281 detector_np = np.int64(2) if np else 2 

282 

283 # Try to get it using different variations of dataId + keyword 

284 # arguments 

285 # Note that instrument.class_name does not work 

286 variants = ( 

287 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

288 (None, {"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}), 

289 ({"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}, {}), 

290 ({"instrument": "Cam1", "detector": detector_np, "physical_filter": "Cam1-G"}, {}), 

291 ({"instrument": "Cam1", "detector": 2}, {"physical_filter": "Cam1-G"}), 

292 ({"detector.full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

293 ({"full_name": "Ab"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

294 (None, {"full_name": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

295 (None, {"detector": "Ab", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

296 ({"name_in_raft": "b", "raft": "A"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

297 ({"name_in_raft": "b"}, {"raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

298 (None, {"name_in_raft": "b", "raft": "A", "instrument": "Cam1", "physical_filter": "Cam1-G"}), 

299 ( 

300 {"detector.name_in_raft": "b", "detector.raft": "A"}, 

301 {"instrument": "Cam1", "physical_filter": "Cam1-G"}, 

302 ), 

303 ( 

304 { 

305 "detector.name_in_raft": "b", 

306 "detector.raft": "A", 

307 "instrument": "Cam1", 

308 "physical_filter": "Cam1-G", 

309 }, 

310 {}, 

311 ), 

312 # Duplicate (but valid) information. 

313 (None, {"instrument": "Cam1", "detector": 2, "raft": "A", "physical_filter": "Cam1-G"}), 

314 ({"detector": 2}, {"instrument": "Cam1", "raft": "A", "physical_filter": "Cam1-G"}), 

315 ({"raft": "A"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

316 ({"raft": "A"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

317 ) 

318 

319 for dataId, kwds in variants: 

320 try: 

321 flat_id, _ = butler.get("flat", dataId=dataId, collections=coll, **kwds) 

322 except Exception as e: 

323 e.add_note(f"dataId={dataId}, kwds={kwds}") 

324 raise 

325 self.assertEqual(flat_id, flat2g.id, msg=f"DataId: {dataId}, kwds: {kwds}") 

326 

327 # Check that bad combinations raise. 

328 variants = ( 

329 # Inconsistent detector information. 

330 (None, {"instrument": "Cam1", "detector": 2, "raft": "B", "physical_filter": "Cam1-G"}), 

331 ({"detector": 2}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

332 ({"detector": 12}, {"instrument": "Cam1", "raft": "B", "physical_filter": "Cam1-G"}), 

333 ({"raft": "B"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

334 ({"raft": "B"}, {"instrument": "Cam1", "detector": "Ab", "physical_filter": "Cam1-G"}), 

335 # Under-specified. 

336 ({"raft": "B"}, {"instrument": "Cam1", "physical_filter": "Cam1-G"}), 

337 # Spurious kwargs. 

338 (None, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G", "x": "y"}), 

339 ({"x": "y"}, {"instrument": "Cam1", "detector": 2, "physical_filter": "Cam1-G"}), 

340 ) 

341 for dataId, kwds in variants: 

342 with self.assertRaises((ValueError, LookupError)): 

343 butler.get("flat", dataId=dataId, collections=coll, **kwds) 

344 

345 def testGetCalibration(self): 

346 """Test that `Butler.get` can be used to fetch from 

347 `~CollectionType.CALIBRATION` collections if the data ID includes 

348 extra dimensions with temporal information. 

349 """ 

350 # Import data to play with. 

351 butler = self.makeButler(writeable=True) 

352 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

353 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

354 # Certify some biases into a CALIBRATION collection. 

355 registry = butler.registry 

356 registry.registerCollection("calibs", CollectionType.CALIBRATION) 

357 t1 = astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai") 

358 t2 = astropy.time.Time("2020-01-01T02:00:00", format="isot", scale="tai") 

359 t3 = astropy.time.Time("2020-01-01T03:00:00", format="isot", scale="tai") 

360 bias1a = registry.findDataset("bias", instrument="Cam1", detector=1, collections="imported_g") 

361 bias2a = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_g") 

362 bias3a = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_g") 

363 bias2b = registry.findDataset("bias", instrument="Cam1", detector=2, collections="imported_r") 

364 bias3b = registry.findDataset("bias", instrument="Cam1", detector=3, collections="imported_r") 

365 registry.certify("calibs", [bias1a], Timespan(t1, t2)) 

366 registry.certify("calibs", [bias2a, bias3a], Timespan(t1, t2)) 

367 registry.certify("calibs", [bias2b], Timespan(t2, None)) 

368 registry.certify("calibs", [bias3b], Timespan(t2, t3)) 

369 # Insert some exposure dimension data. 

370 registry.insertDimensionData( 

371 "group", 

372 {"instrument": "Cam1", "group": "three"}, 

373 {"instrument": "Cam1", "group": "four"}, 

374 ) 

375 registry.insertDimensionData( 

376 "day_obs", 

377 {"instrument": "Cam1", "id": 20211114}, 

378 ) 

379 # Choose timespans for exposures within the above calibration ranges 

380 # but make sure they are not identical to the full range. 

381 exp_time = astropy.time.TimeDelta(15.0, format="sec", scale="tai") 

382 span_delta = t2 - t1 

383 exp3_begin = t1 + (span_delta / 2.0) 

384 exp3_end = exp3_begin + exp_time 

385 span_delta = t3 - t2 

386 exp4_begin = t2 + (span_delta / 2.0) 

387 exp4_end = exp4_begin + exp_time 

388 registry.insertDimensionData( 

389 "exposure", 

390 { 

391 "instrument": "Cam1", 

392 "id": 3, 

393 "obs_id": "three", 

394 "timespan": Timespan(exp3_begin, exp3_end), 

395 "physical_filter": "Cam1-G", 

396 "group": "three", 

397 "day_obs": 20211114, 

398 "seq_num": 55, 

399 }, 

400 { 

401 "instrument": "Cam1", 

402 "id": 4, 

403 "obs_id": "four", 

404 "timespan": Timespan(exp4_begin, exp4_end), 

405 "physical_filter": "Cam1-G", 

406 "group": "four", 

407 "day_obs": 20211114, 

408 "seq_num": 42, 

409 }, 

410 ) 

411 # Get some biases from raw-like data IDs. 

412 bias2a_id, _ = butler.get( 

413 "bias", {"instrument": "Cam1", "exposure": 3, "detector": 2}, collections="calibs" 

414 ) 

415 self.assertEqual(bias2a_id, bias2a.id) 

416 bias3b_id, _ = butler.get( 

417 "bias", {"instrument": "Cam1", "exposure": 4, "detector": 3}, collections="calibs" 

418 ) 

419 self.assertEqual(bias3b_id, bias3b.id) 

420 

421 # Use explicit timespan and no exposure record. 

422 bias3b_id, _ = butler.get( 

423 "bias", 

424 {"instrument": "Cam1", "detector": 3}, 

425 collections="calibs", 

426 timespan=Timespan(exp4_begin, exp4_end), 

427 ) 

428 self.assertEqual(bias3b_id, bias3b.id) 

429 

430 # No timespan at all. 

431 # Only one matching dataset in calibs collection so this works with 

432 # a defaulted timespan. 

433 bias1a_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 1}, collections="calibs") 

434 self.assertEqual(bias1a_id, bias1a.id) 

435 

436 # Multiple datasets match in calibs collection with infinite timespan 

437 # so this fails. 

438 with self.assertRaises(LookupError): 

439 bias3b_id, _ = butler.get("bias", {"instrument": "Cam1", "detector": 3}, collections="calibs") 

440 

441 # Get using the kwarg form 

442 bias3b_id, _ = butler.get("bias", instrument="Cam1", exposure=4, detector=3, collections="calibs") 

443 self.assertEqual(bias3b_id, bias3b.id) 

444 

445 # Do it again but using the record information 

446 bias2a_id, _ = butler.get( 

447 "bias", 

448 {"instrument": "Cam1", "exposure.obs_id": "three", "detector.full_name": "Ab"}, 

449 collections="calibs", 

450 ) 

451 self.assertEqual(bias2a_id, bias2a.id) 

452 bias3b_id, _ = butler.get( 

453 "bias", 

454 {"exposure.obs_id": "four", "detector.full_name": "Ba"}, 

455 collections="calibs", 

456 instrument="Cam1", 

457 ) 

458 self.assertEqual(bias3b_id, bias3b.id) 

459 

460 # And again but this time using the alternate value rather than 

461 # the primary. 

462 bias3b_id, _ = butler.get( 

463 "bias", {"exposure": "four", "detector": "Ba"}, collections="calibs", instrument="Cam1" 

464 ) 

465 self.assertEqual(bias3b_id, bias3b.id) 

466 

467 # And again but this time using the alternate value rather than 

468 # the primary and do it in the keyword arguments. 

469 bias3b_id, _ = butler.get( 

470 "bias", exposure="four", detector="Ba", collections="calibs", instrument="Cam1" 

471 ) 

472 self.assertEqual(bias3b_id, bias3b.id) 

473 

474 # Now with implied record columns 

475 bias3b_id, _ = butler.get( 

476 "bias", 

477 day_obs=20211114, 

478 seq_num=42, 

479 raft="B", 

480 name_in_raft="a", 

481 collections="calibs", 

482 instrument="Cam1", 

483 ) 

484 self.assertEqual(bias3b_id, bias3b.id) 

485 

486 # Allow a fully-specified dataId and unnecessary extra information 

487 # that comes from the record. 

488 bias3b_id, _ = butler.get( 

489 "bias", 

490 dataId=dict( 

491 exposure=4, 

492 day_obs=20211114, 

493 seq_num=42, 

494 detector=3, 

495 instrument="Cam1", 

496 ), 

497 collections="calibs", 

498 ) 

499 self.assertEqual(bias3b_id, bias3b.id) 

500 

501 # Extra but inconsistent record values are a problem. 

502 with self.assertRaises(ValueError): 

503 bias3b_id, _ = butler.get( 

504 "bias", 

505 exposure=3, 

506 day_obs=20211114, 

507 seq_num=42, 

508 detector=3, 

509 collections="calibs", 

510 instrument="Cam1", 

511 ) 

512 

513 # Ensure that spurious kwargs cause an exception. 

514 with self.assertRaises(ValueError): 

515 butler.get( 

516 "bias", 

517 {"exposure.obs_id": "four", "immediate": True, "detector.full_name": "Ba"}, 

518 collections="calibs", 

519 instrument="Cam1", 

520 ) 

521 

522 with self.assertRaises(ValueError): 

523 butler.get( 

524 "bias", 

525 day_obs=20211114, 

526 seq_num=42, 

527 raft="B", 

528 name_in_raft="a", 

529 collections="calibs", 

530 instrument="Cam1", 

531 immediate=True, 

532 ) 

533 

534 def testRegistryDefaults(self): 

535 """Test that we can default the collections and some data ID keys when 

536 constructing a butler. 

537 

538 Many tests that use default run already exist in ``test_butler.py``, so 

539 that isn't tested here. And while most of this functionality is 

540 implemented in `Registry`, we test it here instead of 

541 ``daf/butler/tests/registry.py`` because it shouldn't depend on the 

542 database backend at all. 

543 """ 

544 butler = self.makeButler(writeable=True) 

545 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

546 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

547 # Need to actually set defaults later, not at construction, because 

548 # we need to import the instrument before we can use it as a default. 

549 # Don't set a default instrument value for data IDs, because 'Cam1' 

550 # should be inferred by virtue of that being the only value in the 

551 # input collections. 

552 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

553 # Use findDataset without collections or instrument. 

554 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

555 # Do the same with Butler.get; this should ultimately invoke a lot of 

556 # the same code, so it's a bit circular, but mostly we're checking that 

557 # it works at all. 

558 dataset_id, _ = butler.get("flat", detector=2, physical_filter="Cam1-G") 

559 self.assertEqual(ref.id, dataset_id) 

560 # Query for datasets. Test defaulting the data ID in both kwargs and 

561 # in the WHERE expression. 

562 queried_refs_1 = set(butler.registry.queryDatasets("flat", detector=2, physical_filter="Cam1-G")) 

563 self.assertEqual({ref}, queried_refs_1) 

564 queried_refs_2 = set( 

565 butler.registry.queryDatasets("flat", where="detector=2 AND physical_filter='Cam1-G'") 

566 ) 

567 self.assertEqual({ref}, queried_refs_2) 

568 # Query for data IDs with a dataset constraint. 

569 queried_data_ids = set( 

570 butler.registry.queryDataIds( 

571 {"instrument", "detector", "physical_filter"}, 

572 datasets={"flat"}, 

573 detector=2, 

574 physical_filter="Cam1-G", 

575 ) 

576 ) 

577 self.assertEqual({ref.dataId}, queried_data_ids) 

578 # Add another instrument to the repo, and a dataset that uses it to 

579 # the `imported_g` collection. 

580 butler.registry.insertDimensionData("instrument", {"name": "Cam2"}) 

581 camera = DatasetType( 

582 "camera", 

583 dimensions=butler.dimensions["instrument"].graph, 

584 storageClass="Camera", 

585 ) 

586 butler.registry.registerDatasetType(camera) 

587 butler.registry.insertDatasets(camera, [{"instrument": "Cam2"}], run="imported_g") 

588 # Initialize a new butler with `imported_g` as its default run. 

589 # This should not have a default instrument, because there are two. 

590 # Pass run instead of collections; this should set both. 

591 butler2 = Butler.from_config(butler=butler, run="imported_g") 

592 self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) 

593 self.assertEqual(butler2.registry.defaults.run, "imported_g") 

594 self.assertFalse(butler2.registry.defaults.dataId) 

595 # Initialize a new butler with an instrument default explicitly given. 

596 # Set collections instead of run, which should then be None. 

597 butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") 

598 self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) 

599 self.assertIsNone(butler3.registry.defaults.run, None) 

600 self.assertEqual(butler3.registry.defaults.dataId.required, {"instrument": "Cam2"}) 

601 

602 # Check that repr() does not fail. 

603 defaults = RegistryDefaults(collections=["imported_g"], run="test") 

604 r = repr(defaults) 

605 self.assertIn("collections=('imported_g',)", r) 

606 self.assertIn("run='test'", r) 

607 

608 defaults = RegistryDefaults(run="test", instrument="DummyCam", skypix="pix") 

609 r = repr(defaults) 

610 self.assertIn("skypix='pix'", r) 

611 self.assertIn("instrument='DummyCam'", r) 

612 

613 def testJson(self): 

614 """Test JSON serialization mediated by registry.""" 

615 butler = self.makeButler(writeable=True) 

616 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

617 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", self.datasetsImportFile)) 

618 # Need to actually set defaults later, not at construction, because 

619 # we need to import the instrument before we can use it as a default. 

620 # Don't set a default instrument value for data IDs, because 'Cam1' 

621 # should be inferred by virtue of that being the only value in the 

622 # input collections. 

623 butler.registry.defaults = RegistryDefaults(collections=["imported_g"]) 

624 # Use findDataset without collections or instrument. 

625 ref = butler.find_dataset("flat", detector=2, physical_filter="Cam1-G") 

626 

627 # Transform the ref and dataset type to and from JSON 

628 # and check that it can be reconstructed properly 

629 

630 # Do it with the ref and a component ref in minimal and standard form 

631 compRef = ref.makeComponentRef("wcs") 

632 

633 for test_item in (ref, ref.datasetType, compRef, compRef.datasetType): 

634 for minimal in (False, True): 

635 json_str = test_item.to_json(minimal=minimal) 

636 from_json = type(test_item).from_json(json_str, registry=butler.registry) 

637 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using registry") 

638 

639 # for minimal=False case also do a test without registry 

640 if not minimal: 

641 from_json = type(test_item).from_json(json_str, universe=butler.dimensions) 

642 self.assertEqual(from_json, test_item, msg=f"From JSON '{json_str}' using universe") 

643 

644 def test_populated_by(self): 

645 """Test that dimension records can find other records.""" 

646 butler = self.makeButler(writeable=True) 

647 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

648 

649 elements = frozenset(element for element in butler.dimensions.elements if element.has_own_table) 

650 

651 # Get a visit-based dataId. 

652 data_ids = set(butler.registry.queryDataIds("visit", visit=1232, instrument="HSC")) 

653 

654 # Request all the records related to it. 

655 records = butler._extract_all_dimension_records_from_data_ids(butler, data_ids, elements) 

656 

657 self.assertIn(butler.dimensions["visit_detector_region"], records, f"Keys: {records.keys()}") 

658 self.assertIn(butler.dimensions["visit_system_membership"], records) 

659 self.assertIn(butler.dimensions["visit_system"], records) 

660 

661 def testJsonDimensionRecordsAndHtmlRepresentation(self): 

662 # Dimension Records 

663 butler = self.makeButler(writeable=True) 

664 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

665 

666 for dimension in ("detector", "visit", "exposure", "day_obs", "group"): 

667 records = butler.registry.queryDimensionRecords(dimension, instrument="HSC") 

668 for r in records: 

669 for minimal in (True, False): 

670 json_str = r.to_json(minimal=minimal) 

671 r_json = type(r).from_json(json_str, registry=butler.registry) 

672 self.assertEqual(r_json, r) 

673 # check with direct method 

674 simple = r.to_simple() 

675 fromDirect = type(simple).direct(**json.loads(json_str)) 

676 self.assertEqual(simple, fromDirect) 

677 # Also check equality of each of the components as dicts 

678 self.assertEqual(r_json.toDict(), r.toDict()) 

679 

680 # check the html representation of records 

681 r_html = r._repr_html_() 

682 self.assertTrue(isinstance(r_html, str)) 

683 self.assertIn(dimension, r_html) 

684 

685 def test_dimension_records_import(self): 

686 # Dimension Records 

687 butler = self.makeButler(writeable=True) 

688 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset-v0.yaml")) 

689 

690 # Count records and assume this means it worked. 

691 dimensions = ( 

692 ("day_obs", 15), 

693 ("group", 1), 

694 ("exposure", 1), 

695 ("visit", 160), 

696 ("detector", 111), 

697 ("visit_system_membership", 160), 

698 ) 

699 for dimension, count in dimensions: 

700 records = list(butler.registry.queryDimensionRecords(dimension, instrument="HSC")) 

701 self.assertEqual(len(records), count) 

702 

703 def testWildcardQueries(self): 

704 """Test that different collection type queries work.""" 

705 # Import data to play with. 

706 butler = self.makeButler(writeable=True) 

707 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) 

708 

709 # Create some collections 

710 created = {"collection", "u/user/test", "coll3"} 

711 for collection in created: 

712 butler.registry.registerCollection(collection, type=CollectionType.RUN) 

713 

714 collections = butler.registry.queryCollections() 

715 self.assertEqual(set(collections), created) 

716 

717 expressions = [ 

718 ("collection", {"collection"}), 

719 (..., created), 

720 ("*", created), 

721 (("collection", "*"), created), 

722 ("u/*", {"u/user/test"}), 

723 ("*oll*", {"collection", "coll3"}), 

724 ("*[0-9]", {"coll3"}), 

725 ] 

726 if self.supportsCollectionRegex: 

727 expressions.extend( 

728 [ 

729 (re.compile("u.*"), {"u/user/test"}), 

730 (re.compile(".*oll.*"), {"collection", "coll3"}), 

731 ((re.compile(r".*\d$"), "u/user/test"), {"coll3", "u/user/test"}), 

732 ] 

733 ) 

734 for expression, expected in expressions: 

735 result = butler.registry.queryCollections(expression) 

736 self.assertEqual(set(result), expected) 

737 

738 def test_skypix_templates(self): 

739 """Test that skypix templates can work.""" 

740 # Dimension Records 

741 butler = self.makeButler(writeable=True) 

742 butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "hsc-rc2-subset.yaml")) 

743 

744 sc = StorageClass("null") 

745 dataset_type = DatasetType("warp", ("visit", "htm7"), sc, universe=butler.dimensions) 

746 dataId = butler.registry.expandDataId( 

747 DataCoordinate.standardize( 

748 dict(visit=27136, htm7=12345, instrument="HSC"), universe=butler.dimensions 

749 ) 

750 ) 

751 ref = DatasetRef(dataset_type, dataId, run="test") 

752 self.assertTrue(ref.dataId.hasRecords()) 

753 

754 tmplstr = "{run}/{datasetType}/{visit.name}_{skypix}_{htm7}_{skypix.id}_{htm7.id}" 

755 file_template = FileTemplate(tmplstr) 

756 path = file_template.format(ref) 

757 self.assertEqual(path, "test/warp/HSCA02713600_12345_12345_12345_12345") 

758 

759 

760class DirectSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase): 

761 """Run tests against DirectButler implementation.""" 

762 

763 datasetsManager = ( 

764 "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" 

765 ) 

766 

767 collectionsManager = "lsst.daf.butler.registry.collections.synthIntKey.SynthIntKeyCollectionManager" 

768 

769 def setUp(self): 

770 self.root = makeTestTempDir(TESTDIR) 

771 

772 def tearDown(self): 

773 removeTestTempDir(self.root) 

774 

775 def makeButler(self, writeable: bool = False) -> Butler: 

776 config = ButlerConfig() 

777 

778 # make separate temporary directory for registry of this instance 

779 tmpdir = tempfile.mkdtemp(dir=self.root) 

780 config["registry", "db"] = f"sqlite:///{tmpdir}/gen3.sqlite3" 

781 config["registry", "managers", "datasets"] = self.datasetsManager 

782 config["registry", "managers", "collections"] = self.collectionsManager 

783 config["root"] = self.root 

784 

785 # have to make a registry first 

786 registryConfig = RegistryConfig(config.get("registry")) 

787 _RegistryFactory(registryConfig).create_from_config() 

788 

789 butler = Butler.from_config(config, writeable=writeable) 

790 DatastoreMock.apply(butler) 

791 return butler 

792 

793 

794class NameKeyCollectionManagerDirectSimpleButlerTestCase(DirectSimpleButlerTestCase, unittest.TestCase): 

795 """Run tests against DirectButler implementation using the 

796 NameKeyCollectionsManager. 

797 """ 

798 

799 collectionsManager = "lsst.daf.butler.registry.collections.nameKey.NameKeyCollectionManager" 

800 

801 

802@unittest.skipIf(create_test_server is None, "Server dependencies not installed.") 

803class RemoteSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase): 

804 """Run tests against Butler client/server.""" 

805 

806 supportsCollectionRegex = False 

807 

808 def makeButler(self, writeable: bool = False) -> Butler: 

809 server_instance = self.enterContext(create_test_server(TESTDIR)) 

810 butler = server_instance.hybrid_butler 

811 DatastoreMock.apply(butler) 

812 # Because RemoteButler doesn't have a Datastore object, we have to 

813 # duplicate some of the functionality from DatastoreMock separately. 

814 butler._remote_butler._get_dataset_as_python_object = _mock_get_dataset_as_python_object 

815 return butler 

816 

817 def testRegistryDefaults(self): 

818 # Registry defaults are not yet fully implemented in RemoteButler. 

819 pass 

820 

821 def testJson(self): 

822 # Needs registry defaults functionality that's not yet implemented in 

823 # RemoteButler. 

824 pass 

825 

826 

827def _mock_get_dataset_as_python_object( 

828 ref: DatasetRef, 

829 model: Any, 

830 parameters: dict[str, Any] | None, 

831) -> Any: 

832 """Mimic the functionality of DatastoreMock's get() mock.""" 

833 return (ref.id, parameters) 

834 

835 

836if __name__ == "__main__": 

837 unittest.main()