Coverage for tests/test_datastore.py: 11%

1060 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 NullDatastore, 

53 StorageClass, 

54 StorageClassFactory, 

55 StoredFileInfo, 

56) 

57from lsst.daf.butler.formatters.yaml import YamlFormatter 

58from lsst.daf.butler.tests import ( 

59 BadNoWriteFormatter, 

60 BadWriteFormatter, 

61 DatasetTestHelper, 

62 DatastoreTestHelper, 

63 DummyRegistry, 

64 MetricsExample, 

65 MetricsExampleDataclass, 

66 MetricsExampleModel, 

67) 

68from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

69from lsst.daf.butler.tests.utils import TestCaseMixin 

70from lsst.resources import ResourcePath 

71from lsst.utils import doImport 

72 

73TESTDIR = os.path.dirname(__file__) 

74 

75 

76def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

77 """Make example dataset that can be stored in butler.""" 

78 if use_none: 

79 array = None 

80 else: 

81 array = [563, 234, 456.7, 105, 2054, -1045] 

82 return MetricsExample( 

83 {"AM1": 5.2, "AM2": 30.6}, 

84 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

85 array, 

86 ) 

87 

88 

89class TransactionTestError(Exception): 

90 """Specific error for transactions, to prevent misdiagnosing 

91 that might otherwise occur when a standard exception is used. 

92 """ 

93 

94 pass 

95 

96 

97class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

98 """Support routines for datastore testing""" 

99 

100 root: str | None = None 

101 universe: DimensionUniverse 

102 storageClassFactory: StorageClassFactory 

103 

104 @classmethod 

105 def setUpClass(cls) -> None: 

106 # Storage Classes are fixed for all datastores in these tests 

107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

108 cls.storageClassFactory = StorageClassFactory() 

109 cls.storageClassFactory.addFromConfig(scConfigFile) 

110 

111 # Read the Datastore config so we can get the class 

112 # information (since we should not assume the constructor 

113 # name here, but rely on the configuration file itself) 

114 datastoreConfig = DatastoreConfig(cls.configFile) 

115 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

116 cls.universe = DimensionUniverse() 

117 

118 def setUp(self) -> None: 

119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

120 

121 def tearDown(self) -> None: 

122 if self.root is not None and os.path.exists(self.root): 

123 shutil.rmtree(self.root, ignore_errors=True) 

124 

125 

126class DatastoreTests(DatastoreTestsBase): 

127 """Some basic tests of a simple datastore.""" 

128 

129 hasUnsupportedPut = True 

130 rootKeys: tuple[str, ...] | None = None 

131 isEphemeral: bool = False 

132 validationCanFail: bool = False 

133 

134 def testConfigRoot(self) -> None: 

135 full = DatastoreConfig(self.configFile) 

136 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

137 newroot = "/random/location" 

138 self.datastoreType.setConfigRoot(newroot, config, full) 

139 if self.rootKeys: 

140 for k in self.rootKeys: 

141 self.assertIn(newroot, config[k]) 

142 

143 def testConstructor(self) -> None: 

144 datastore = self.makeDatastore() 

145 self.assertIsNotNone(datastore) 

146 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

147 

148 def testConfigurationValidation(self) -> None: 

149 datastore = self.makeDatastore() 

150 sc = self.storageClassFactory.getStorageClass("ThingOne") 

151 datastore.validateConfiguration([sc]) 

152 

153 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

154 if self.validationCanFail: 

155 with self.assertRaises(DatastoreValidationError): 

156 datastore.validateConfiguration([sc2], logFailures=True) 

157 

158 dimensions = self.universe.extract(("visit", "physical_filter")) 

159 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

160 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

161 datastore.validateConfiguration([ref]) 

162 

163 def testParameterValidation(self) -> None: 

164 """Check that parameters are validated""" 

165 sc = self.storageClassFactory.getStorageClass("ThingOne") 

166 dimensions = self.universe.extract(("visit", "physical_filter")) 

167 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

168 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

169 datastore = self.makeDatastore() 

170 data = {1: 2, 3: 4} 

171 datastore.put(data, ref) 

172 newdata = datastore.get(ref) 

173 self.assertEqual(data, newdata) 

174 with self.assertRaises(KeyError): 

175 newdata = datastore.get(ref, parameters={"missing": 5}) 

176 

177 def testBasicPutGet(self) -> None: 

178 metrics = makeExampleMetrics() 

179 datastore = self.makeDatastore() 

180 

181 # Create multiple storage classes for testing different formulations 

182 storageClasses = [ 

183 self.storageClassFactory.getStorageClass(sc) 

184 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

185 ] 

186 

187 dimensions = self.universe.extract(("visit", "physical_filter")) 

188 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

189 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} 

190 

191 for sc in storageClasses: 

192 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

193 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

194 

195 # Make sure that using getManyURIs without predicting before the 

196 # dataset has been put raises. 

197 with self.assertRaises(FileNotFoundError): 

198 datastore.getManyURIs([ref], predict=False) 

199 

200 # Make sure that using getManyURIs with predicting before the 

201 # dataset has been put predicts the URI. 

202 uris = datastore.getManyURIs([ref, ref2], predict=True) 

203 self.assertIn("52", uris[ref].primaryURI.geturl()) 

204 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

205 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

206 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

207 

208 datastore.put(metrics, ref) 

209 

210 # Does it exist? 

211 self.assertTrue(datastore.exists(ref)) 

212 self.assertTrue(datastore.knows(ref)) 

213 multi = datastore.knows_these([ref]) 

214 self.assertTrue(multi[ref]) 

215 multi = datastore.mexists([ref, ref2]) 

216 self.assertTrue(multi[ref]) 

217 self.assertFalse(multi[ref2]) 

218 

219 # Get 

220 metricsOut = datastore.get(ref, parameters=None) 

221 self.assertEqual(metrics, metricsOut) 

222 

223 uri = datastore.getURI(ref) 

224 self.assertEqual(uri.scheme, self.uriScheme) 

225 

226 uris = datastore.getManyURIs([ref]) 

227 self.assertEqual(len(uris), 1) 

228 ref, uri = uris.popitem() 

229 self.assertTrue(uri.primaryURI.exists()) 

230 self.assertFalse(uri.componentURIs) 

231 

232 # Get a component -- we need to construct new refs for them 

233 # with derived storage classes but with parent ID 

234 for comp in ("data", "output"): 

235 compRef = ref.makeComponentRef(comp) 

236 output = datastore.get(compRef) 

237 self.assertEqual(output, getattr(metricsOut, comp)) 

238 

239 uri = datastore.getURI(compRef) 

240 self.assertEqual(uri.scheme, self.uriScheme) 

241 

242 uris = datastore.getManyURIs([compRef]) 

243 self.assertEqual(len(uris), 1) 

244 

245 storageClass = sc 

246 

247 # Check that we can put a metric with None in a component and 

248 # get it back as None 

249 metricsNone = makeExampleMetrics(use_none=True) 

250 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

251 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

252 datastore.put(metricsNone, refNone) 

253 

254 comp = "data" 

255 for comp in ("data", "output"): 

256 compRef = refNone.makeComponentRef(comp) 

257 output = datastore.get(compRef) 

258 self.assertEqual(output, getattr(metricsNone, comp)) 

259 

260 # Check that a put fails if the dataset type is not supported 

261 if self.hasUnsupportedPut: 

262 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

263 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

264 with self.assertRaises(DatasetTypeNotSupportedError): 

265 datastore.put(metrics, ref) 

266 

267 # These should raise 

268 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

269 with self.assertRaises(FileNotFoundError): 

270 # non-existing file 

271 datastore.get(ref) 

272 

273 # Get a URI from it 

274 uri = datastore.getURI(ref, predict=True) 

275 self.assertEqual(uri.scheme, self.uriScheme) 

276 

277 with self.assertRaises(FileNotFoundError): 

278 datastore.getURI(ref) 

279 

280 def testTrustGetRequest(self) -> None: 

281 """Check that we can get datasets that registry knows nothing about.""" 

282 datastore = self.makeDatastore() 

283 

284 # Skip test if the attribute is not defined 

285 if not hasattr(datastore, "trustGetRequest"): 

286 return 

287 

288 metrics = makeExampleMetrics() 

289 

290 i = 0 

291 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

292 i += 1 

293 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

294 

295 if sc_name == "StructuredComposite": 

296 disassembled = True 

297 else: 

298 disassembled = False 

299 

300 # Start datastore in default configuration of using registry 

301 datastore.trustGetRequest = False 

302 

303 # Create multiple storage classes for testing with or without 

304 # disassembly 

305 sc = self.storageClassFactory.getStorageClass(sc_name) 

306 dimensions = self.universe.extract(("visit", "physical_filter")) 

307 

308 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} 

309 

310 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

311 datastore.put(metrics, ref) 

312 

313 # Does it exist? 

314 self.assertTrue(datastore.exists(ref)) 

315 self.assertTrue(datastore.knows(ref)) 

316 multi = datastore.knows_these([ref]) 

317 self.assertTrue(multi[ref]) 

318 multi = datastore.mexists([ref]) 

319 self.assertTrue(multi[ref]) 

320 

321 # Get 

322 metricsOut = datastore.get(ref) 

323 self.assertEqual(metrics, metricsOut) 

324 

325 # Get the URI(s) 

326 primaryURI, componentURIs = datastore.getURIs(ref) 

327 if disassembled: 

328 self.assertIsNone(primaryURI) 

329 self.assertEqual(len(componentURIs), 3) 

330 else: 

331 self.assertIn(datasetTypeName, primaryURI.path) 

332 self.assertFalse(componentURIs) 

333 

334 # Delete registry entry so now we are trusting 

335 datastore.removeStoredItemInfo(ref) 

336 

337 # Now stop trusting and check that things break 

338 datastore.trustGetRequest = False 

339 

340 # Does it exist? 

341 self.assertFalse(datastore.exists(ref)) 

342 self.assertFalse(datastore.knows(ref)) 

343 multi = datastore.knows_these([ref]) 

344 self.assertFalse(multi[ref]) 

345 multi = datastore.mexists([ref]) 

346 self.assertFalse(multi[ref]) 

347 

348 with self.assertRaises(FileNotFoundError): 

349 datastore.get(ref) 

350 

351 if sc_name != "StructuredDataNoComponents": 

352 with self.assertRaises(FileNotFoundError): 

353 datastore.get(ref.makeComponentRef("data")) 

354 

355 # URI should fail unless we ask for prediction 

356 with self.assertRaises(FileNotFoundError): 

357 datastore.getURIs(ref) 

358 

359 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

360 if disassembled: 

361 self.assertIsNone(predicted_primary) 

362 self.assertEqual(len(predicted_disassembled), 3) 

363 for uri in predicted_disassembled.values(): 

364 self.assertEqual(uri.fragment, "predicted") 

365 self.assertIn(datasetTypeName, uri.path) 

366 else: 

367 self.assertIn(datasetTypeName, predicted_primary.path) 

368 self.assertFalse(predicted_disassembled) 

369 self.assertEqual(predicted_primary.fragment, "predicted") 

370 

371 # Now enable registry-free trusting mode 

372 datastore.trustGetRequest = True 

373 

374 # Try again to get it 

375 metricsOut = datastore.get(ref) 

376 self.assertEqual(metricsOut, metrics) 

377 

378 # Does it exist? 

379 self.assertTrue(datastore.exists(ref)) 

380 

381 # Get a component 

382 if sc_name != "StructuredDataNoComponents": 

383 comp = "data" 

384 compRef = ref.makeComponentRef(comp) 

385 output = datastore.get(compRef) 

386 self.assertEqual(output, getattr(metrics, comp)) 

387 

388 # Get the URI -- if we trust this should work even without 

389 # enabling prediction. 

390 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

391 self.assertEqual(primaryURI2, primaryURI) 

392 self.assertEqual(componentURIs2, componentURIs) 

393 

394 # Check for compatible storage class. 

395 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

396 # Make new dataset ref with compatible storage class. 

397 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

398 

399 # Without `set_retrieve_dataset_type_method` it will fail to 

400 # find correct file. 

401 self.assertFalse(datastore.exists(ref_comp)) 

402 with self.assertRaises(FileNotFoundError): 

403 datastore.get(ref_comp) 

404 with self.assertRaises(FileNotFoundError): 

405 datastore.get(ref, storageClass="StructuredDataDictJson") 

406 

407 # Need a special method to generate stored dataset type. 

408 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

409 if name == ref.datasetType.name: 

410 return ref.datasetType 

411 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

412 

413 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

414 

415 # Storage class override with original dataset ref. 

416 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

417 self.assertIsInstance(metrics_as_dict, dict) 

418 

419 # get() should return a dict now. 

420 metrics_as_dict = datastore.get(ref_comp) 

421 self.assertIsInstance(metrics_as_dict, dict) 

422 

423 # exists() should work as well. 

424 self.assertTrue(datastore.exists(ref_comp)) 

425 

426 datastore.set_retrieve_dataset_type_method(None) 

427 

428 def testDisassembly(self) -> None: 

429 """Test disassembly within datastore.""" 

430 metrics = makeExampleMetrics() 

431 if self.isEphemeral: 

432 # in-memory datastore does not disassemble 

433 return 

434 

435 # Create multiple storage classes for testing different formulations 

436 # of composites. One of these will not disassemble to provide 

437 # a reference. 

438 storageClasses = [ 

439 self.storageClassFactory.getStorageClass(sc) 

440 for sc in ( 

441 "StructuredComposite", 

442 "StructuredCompositeTestA", 

443 "StructuredCompositeTestB", 

444 "StructuredCompositeReadComp", 

445 "StructuredData", # No disassembly 

446 "StructuredCompositeReadCompNoDisassembly", 

447 ) 

448 ] 

449 

450 # Create the test datastore 

451 datastore = self.makeDatastore() 

452 

453 # Dummy dataId 

454 dimensions = self.universe.extract(("visit", "physical_filter")) 

455 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

456 

457 for i, sc in enumerate(storageClasses): 

458 with self.subTest(storageClass=sc.name): 

459 # Create a different dataset type each time round 

460 # so that a test failure in this subtest does not trigger 

461 # a cascade of tests because of file clashes 

462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

463 

464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

465 

466 datastore.put(metrics, ref) 

467 

468 baseURI, compURIs = datastore.getURIs(ref) 

469 if disassembled: 

470 self.assertIsNone(baseURI) 

471 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

472 else: 

473 self.assertIsNotNone(baseURI) 

474 self.assertEqual(compURIs, {}) 

475 

476 metrics_get = datastore.get(ref) 

477 self.assertEqual(metrics_get, metrics) 

478 

479 # Retrieve the composite with read parameter 

480 stop = 4 

481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

482 self.assertEqual(metrics_get.summary, metrics.summary) 

483 self.assertEqual(metrics_get.output, metrics.output) 

484 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

485 

486 # Retrieve a component 

487 data = datastore.get(ref.makeComponentRef("data")) 

488 self.assertEqual(data, metrics.data) 

489 

490 # On supported storage classes attempt to access a read 

491 # only component 

492 if "ReadComp" in sc.name: 

493 cRef = ref.makeComponentRef("counter") 

494 counter = datastore.get(cRef) 

495 self.assertEqual(counter, len(metrics.data)) 

496 

497 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

498 self.assertEqual(counter, stop) 

499 

500 datastore.remove(ref) 

501 

502 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

503 metrics = makeExampleMetrics() 

504 datastore = self.makeDatastore() 

505 # Put 

506 dimensions = self.universe.extract(("visit", "physical_filter")) 

507 sc = self.storageClassFactory.getStorageClass("StructuredData") 

508 refs = [] 

509 for i in range(n_refs): 

510 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

511 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

512 datastore.put(metrics, ref) 

513 

514 # Does it exist? 

515 self.assertTrue(datastore.exists(ref)) 

516 

517 # Get 

518 metricsOut = datastore.get(ref) 

519 self.assertEqual(metrics, metricsOut) 

520 refs.append(ref) 

521 

522 return datastore, *refs 

523 

524 def testRemove(self) -> None: 

525 datastore, ref = self.prepDeleteTest() 

526 

527 # Remove 

528 datastore.remove(ref) 

529 

530 # Does it exist? 

531 self.assertFalse(datastore.exists(ref)) 

532 

533 # Do we now get a predicted URI? 

534 uri = datastore.getURI(ref, predict=True) 

535 self.assertEqual(uri.fragment, "predicted") 

536 

537 # Get should now fail 

538 with self.assertRaises(FileNotFoundError): 

539 datastore.get(ref) 

540 # Can only delete once 

541 with self.assertRaises(FileNotFoundError): 

542 datastore.remove(ref) 

543 

544 def testForget(self) -> None: 

545 datastore, ref = self.prepDeleteTest() 

546 

547 # Remove 

548 datastore.forget([ref]) 

549 

550 # Does it exist (as far as we know)? 

551 self.assertFalse(datastore.exists(ref)) 

552 

553 # Do we now get a predicted URI? 

554 uri = datastore.getURI(ref, predict=True) 

555 self.assertEqual(uri.fragment, "predicted") 

556 

557 # Get should now fail 

558 with self.assertRaises(FileNotFoundError): 

559 datastore.get(ref) 

560 

561 # Forgetting again is a silent no-op 

562 datastore.forget([ref]) 

563 

564 # Predicted URI should still point to the file. 

565 self.assertTrue(uri.exists()) 

566 

567 def testTransfer(self) -> None: 

568 metrics = makeExampleMetrics() 

569 

570 dimensions = self.universe.extract(("visit", "physical_filter")) 

571 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} 

572 

573 sc = self.storageClassFactory.getStorageClass("StructuredData") 

574 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

575 

576 inputDatastore = self.makeDatastore("test_input_datastore") 

577 outputDatastore = self.makeDatastore("test_output_datastore") 

578 

579 inputDatastore.put(metrics, ref) 

580 outputDatastore.transfer(inputDatastore, ref) 

581 

582 metricsOut = outputDatastore.get(ref) 

583 self.assertEqual(metrics, metricsOut) 

584 

585 def testBasicTransaction(self) -> None: 

586 datastore = self.makeDatastore() 

587 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

588 dimensions = self.universe.extract(("visit", "physical_filter")) 

589 nDatasets = 6 

590 dataIds = [ 

591 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) 

592 ] 

593 data = [ 

594 ( 

595 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

596 makeExampleMetrics(), 

597 ) 

598 for dataId in dataIds 

599 ] 

600 succeed = data[: nDatasets // 2] 

601 fail = data[nDatasets // 2 :] 

602 # All datasets added in this transaction should continue to exist 

603 with datastore.transaction(): 

604 for ref, metrics in succeed: 

605 datastore.put(metrics, ref) 

606 # Whereas datasets added in this transaction should not 

607 with self.assertRaises(TransactionTestError): 

608 with datastore.transaction(): 

609 for ref, metrics in fail: 

610 datastore.put(metrics, ref) 

611 raise TransactionTestError("This should propagate out of the context manager") 

612 # Check for datasets that should exist 

613 for ref, metrics in succeed: 

614 # Does it exist? 

615 self.assertTrue(datastore.exists(ref)) 

616 # Get 

617 metricsOut = datastore.get(ref, parameters=None) 

618 self.assertEqual(metrics, metricsOut) 

619 # URI 

620 uri = datastore.getURI(ref) 

621 self.assertEqual(uri.scheme, self.uriScheme) 

622 # Check for datasets that should not exist 

623 for ref, _ in fail: 

624 # These should raise 

625 with self.assertRaises(FileNotFoundError): 

626 # non-existing file 

627 datastore.get(ref) 

628 with self.assertRaises(FileNotFoundError): 

629 datastore.getURI(ref) 

630 

631 def testNestedTransaction(self) -> None: 

632 datastore = self.makeDatastore() 

633 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

634 dimensions = self.universe.extract(("visit", "physical_filter")) 

635 metrics = makeExampleMetrics() 

636 

637 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

638 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

639 datastore.put(metrics, refBefore) 

640 with self.assertRaises(TransactionTestError): 

641 with datastore.transaction(): 

642 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"} 

643 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

644 datastore.put(metrics, refOuter) 

645 with datastore.transaction(): 

646 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"} 

647 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

648 datastore.put(metrics, refInner) 

649 # All datasets should exist 

650 for ref in (refBefore, refOuter, refInner): 

651 metricsOut = datastore.get(ref, parameters=None) 

652 self.assertEqual(metrics, metricsOut) 

653 raise TransactionTestError("This should roll back the transaction") 

654 # Dataset(s) inserted before the transaction should still exist 

655 metricsOut = datastore.get(refBefore, parameters=None) 

656 self.assertEqual(metrics, metricsOut) 

657 # But all datasets inserted during the (rolled back) transaction 

658 # should be gone 

659 with self.assertRaises(FileNotFoundError): 

660 datastore.get(refOuter) 

661 with self.assertRaises(FileNotFoundError): 

662 datastore.get(refInner) 

663 

664 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

665 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

666 dimensions = self.universe.extract(("visit", "physical_filter")) 

667 metrics = makeExampleMetrics() 

668 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

669 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

670 return metrics, ref 

671 

672 def runIngestTest( 

673 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

674 ) -> None: 

675 metrics, ref = self._prepareIngestTest() 

676 # The file will be deleted after the test. 

677 # For symlink tests this leads to a situation where the datastore 

678 # points to a file that does not exist. This will make os.path.exist 

679 # return False but then the new symlink will fail with 

680 # FileExistsError later in the code so the test still passes. 

681 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

682 with open(path, "w") as fd: 

683 yaml.dump(metrics._asdict(), stream=fd) 

684 func(metrics, path, ref) 

685 

686 def testIngestNoTransfer(self) -> None: 

687 """Test ingesting existing files with no transfer.""" 

688 for mode in (None, "auto"): 

689 # Some datastores have auto but can't do in place transfer 

690 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

691 continue 

692 

693 with self.subTest(mode=mode): 

694 datastore = self.makeDatastore() 

695 

696 def succeed( 

697 obj: MetricsExample, 

698 path: str, 

699 ref: DatasetRef, 

700 mode: str | None = mode, 

701 datastore: Datastore = datastore, 

702 ) -> None: 

703 """Ingest a file already in the datastore root.""" 

704 # first move it into the root, and adjust the path 

705 # accordingly 

706 path = shutil.copy(path, datastore.root.ospath) 

707 path = os.path.relpath(path, start=datastore.root.ospath) 

708 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

709 self.assertEqual(obj, datastore.get(ref)) 

710 

711 def failInputDoesNotExist( 

712 obj: MetricsExample, 

713 path: str, 

714 ref: DatasetRef, 

715 mode: str | None = mode, 

716 datastore: Datastore = datastore, 

717 ) -> None: 

718 """Can't ingest files if we're given a bad path.""" 

719 with self.assertRaises(FileNotFoundError): 

720 datastore.ingest( 

721 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

722 ) 

723 self.assertFalse(datastore.exists(ref)) 

724 

725 def failOutsideRoot( 

726 obj: MetricsExample, 

727 path: str, 

728 ref: DatasetRef, 

729 mode: str | None = mode, 

730 datastore: Datastore = datastore, 

731 ) -> None: 

732 """Can't ingest files outside of datastore root unless 

733 auto. 

734 """ 

735 if mode == "auto": 

736 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

737 self.assertTrue(datastore.exists(ref)) 

738 else: 

739 with self.assertRaises(RuntimeError): 

740 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

741 self.assertFalse(datastore.exists(ref)) 

742 

743 def failNotImplemented( 

744 obj: MetricsExample, 

745 path: str, 

746 ref: DatasetRef, 

747 mode: str | None = mode, 

748 datastore: Datastore = datastore, 

749 ) -> None: 

750 with self.assertRaises(NotImplementedError): 

751 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

752 

753 if mode in self.ingestTransferModes: 

754 self.runIngestTest(failOutsideRoot) 

755 self.runIngestTest(failInputDoesNotExist) 

756 self.runIngestTest(succeed) 

757 else: 

758 self.runIngestTest(failNotImplemented) 

759 

760 def testIngestTransfer(self) -> None: 

761 """Test ingesting existing files after transferring them.""" 

762 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

763 with self.subTest(mode=mode): 

764 datastore = self.makeDatastore(mode) 

765 

766 def succeed( 

767 obj: MetricsExample, 

768 path: str, 

769 ref: DatasetRef, 

770 mode: str | None = mode, 

771 datastore: Datastore = datastore, 

772 ) -> None: 

773 """Ingest a file by transferring it to the template 

774 location. 

775 """ 

776 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

777 self.assertEqual(obj, datastore.get(ref)) 

778 

779 def failInputDoesNotExist( 

780 obj: MetricsExample, 

781 path: str, 

782 ref: DatasetRef, 

783 mode: str | None = mode, 

784 datastore: Datastore = datastore, 

785 ) -> None: 

786 """Can't ingest files if we're given a bad path.""" 

787 with self.assertRaises(FileNotFoundError): 

788 # Ensure the file does not look like it is in 

789 # datastore for auto mode 

790 datastore.ingest( 

791 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

792 ) 

793 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

794 

795 def failNotImplemented( 

796 obj: MetricsExample, 

797 path: str, 

798 ref: DatasetRef, 

799 mode: str | None = mode, 

800 datastore: Datastore = datastore, 

801 ) -> None: 

802 with self.assertRaises(NotImplementedError): 

803 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

804 

805 if mode in self.ingestTransferModes: 

806 self.runIngestTest(failInputDoesNotExist) 

807 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

808 else: 

809 self.runIngestTest(failNotImplemented) 

810 

811 def testIngestSymlinkOfSymlink(self) -> None: 

812 """Special test for symlink to a symlink ingest""" 

813 metrics, ref = self._prepareIngestTest() 

814 # The aim of this test is to create a dataset on disk, then 

815 # create a symlink to it and finally ingest the symlink such that 

816 # the symlink in the datastore points to the original dataset. 

817 for mode in ("symlink", "relsymlink"): 

818 if mode not in self.ingestTransferModes: 

819 continue 

820 

821 print(f"Trying mode {mode}") 

822 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

823 with open(realpath, "w") as fd: 

824 yaml.dump(metrics._asdict(), stream=fd) 

825 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

826 os.symlink(os.path.abspath(realpath), sympath) 

827 

828 datastore = self.makeDatastore() 

829 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

830 

831 uri = datastore.getURI(ref) 

832 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

833 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

834 

835 linkTarget = os.readlink(uri.ospath) 

836 if mode == "relsymlink": 

837 self.assertFalse(os.path.isabs(linkTarget)) 

838 else: 

839 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

840 

841 # Check that we can get the dataset back regardless of mode 

842 metric2 = datastore.get(ref) 

843 self.assertEqual(metric2, metrics) 

844 

845 # Cleanup the file for next time round loop 

846 # since it will get the same file name in store 

847 datastore.remove(ref) 

848 

849 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

850 datastore = self.makeDatastore(name) 

851 

852 # For now only the FileDatastore can be used for this test. 

853 # ChainedDatastore that only includes InMemoryDatastores have to be 

854 # skipped as well. 

855 for name in datastore.names: 

856 if not name.startswith("InMemoryDatastore"): 

857 break 

858 else: 

859 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

860 

861 metrics = makeExampleMetrics() 

862 dimensions = self.universe.extract(("visit", "physical_filter")) 

863 sc = self.storageClassFactory.getStorageClass("StructuredData") 

864 

865 refs = [] 

866 for visit in (2048, 2049, 2050): 

867 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

868 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

869 datastore.put(metrics, ref) 

870 refs.append(ref) 

871 return datastore, refs 

872 

873 def testExportImportRecords(self) -> None: 

874 """Test for export_records and import_records methods.""" 

875 datastore, refs = self._populate_export_datastore("test_datastore") 

876 for exported_refs in (refs, refs[1:]): 

877 n_refs = len(exported_refs) 

878 records = datastore.export_records(exported_refs) 

879 self.assertGreater(len(records), 0) 

880 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

881 # In a ChainedDatastore each FileDatastore will have a complete set 

882 for datastore_name in records: 

883 record_data = records[datastore_name] 

884 self.assertEqual(len(record_data.records), n_refs) 

885 

886 # Check that subsetting works, include non-existing dataset ID. 

887 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

888 subset = record_data.subset(dataset_ids) 

889 assert subset is not None 

890 self.assertEqual(len(subset.records), 1) 

891 subset = record_data.subset({uuid.uuid4()}) 

892 self.assertIsNone(subset) 

893 

894 # Use the same datastore name to import relative path. 

895 datastore2 = self.makeDatastore("test_datastore") 

896 

897 records = datastore.export_records(refs[1:]) 

898 datastore2.import_records(records) 

899 

900 with self.assertRaises(FileNotFoundError): 

901 data = datastore2.get(refs[0]) 

902 data = datastore2.get(refs[1]) 

903 self.assertIsNotNone(data) 

904 data = datastore2.get(refs[2]) 

905 self.assertIsNotNone(data) 

906 

907 def testExport(self) -> None: 

908 datastore, refs = self._populate_export_datastore("test_datastore") 

909 

910 datasets = list(datastore.export(refs)) 

911 self.assertEqual(len(datasets), 3) 

912 

913 for transfer in (None, "auto"): 

914 # Both will default to None 

915 datasets = list(datastore.export(refs, transfer=transfer)) 

916 self.assertEqual(len(datasets), 3) 

917 

918 with self.assertRaises(TypeError): 

919 list(datastore.export(refs, transfer="copy")) 

920 

921 with self.assertRaises(TypeError): 

922 list(datastore.export(refs, directory="exportDir", transfer="move")) 

923 

924 # Create a new ref that is not known to the datastore and try to 

925 # export it. 

926 sc = self.storageClassFactory.getStorageClass("ThingOne") 

927 dimensions = self.universe.extract(("visit", "physical_filter")) 

928 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

929 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

930 with self.assertRaises(FileNotFoundError): 

931 list(datastore.export(refs + [ref], transfer=None)) 

932 

933 def test_pydantic_dict_storage_class_conversions(self) -> None: 

934 """Test converting a dataset stored as a pydantic model into a dict on 

935 read. 

936 """ 

937 datastore = self.makeDatastore() 

938 store_as_model = self.makeDatasetRef( 

939 "store_as_model", 

940 dimensions=self.universe.empty, 

941 storageClass="DictConvertibleModel", 

942 dataId=DataCoordinate.makeEmpty(self.universe), 

943 ) 

944 content = {"a": "one", "b": "two"} 

945 model = DictConvertibleModel.from_dict(content, extra="original content") 

946 datastore.put(model, store_as_model) 

947 retrieved_model = datastore.get(store_as_model) 

948 self.assertEqual(retrieved_model, model) 

949 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

950 self.assertEqual(type(loaded), dict) 

951 self.assertEqual(loaded, content) 

952 

953 def test_simple_class_put_get(self) -> None: 

954 """Test that we can put and get a simple class with dict() 

955 constructor. 

956 """ 

957 datastore = self.makeDatastore() 

958 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

959 self._assert_different_puts(datastore, "MetricsExample", data) 

960 

961 def test_dataclass_put_get(self) -> None: 

962 """Test that we can put and get a simple dataclass.""" 

963 datastore = self.makeDatastore() 

964 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

965 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

966 

967 def test_pydantic_put_get(self) -> None: 

968 """Test that we can put and get a simple Pydantic model.""" 

969 datastore = self.makeDatastore() 

970 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

971 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

972 

973 def test_tuple_put_get(self) -> None: 

974 """Test that we can put and get a tuple.""" 

975 datastore = self.makeDatastore() 

976 data = ("a", "b", 1) 

977 self._assert_different_puts(datastore, "TupleExample", data) 

978 

979 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

980 refs = { 

981 x: self.makeDatasetRef( 

982 f"stora_as_{x}", 

983 dimensions=self.universe.empty, 

984 storageClass=f"{storageClass_root}{x}", 

985 dataId=DataCoordinate.makeEmpty(self.universe), 

986 ) 

987 for x in ["A", "B"] 

988 } 

989 

990 for ref in refs.values(): 

991 datastore.put(data, ref) 

992 

993 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

994 

995 

996class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

997 """PosixDatastore specialization""" 

998 

999 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1000 uriScheme = "file" 

1001 canIngestNoTransferAuto = True 

1002 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1003 isEphemeral = False 

1004 rootKeys = ("root",) 

1005 validationCanFail = True 

1006 

1007 def setUp(self) -> None: 

1008 # Override the working directory before calling the base class 

1009 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1010 super().setUp() 

1011 

1012 def testAtomicWrite(self) -> None: 

1013 """Test that we write to a temporary and then rename""" 

1014 datastore = self.makeDatastore() 

1015 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1016 dimensions = self.universe.extract(("visit", "physical_filter")) 

1017 metrics = makeExampleMetrics() 

1018 

1019 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1020 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1021 

1022 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1023 datastore.put(metrics, ref) 

1024 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1025 self.assertIn("transfer=move", move_logs[0]) 

1026 

1027 # And the transfer should be file to file. 

1028 self.assertEqual(move_logs[0].count("file://"), 2) 

1029 

1030 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1031 """Verify that the expected exception is raised if the FileDatastore 

1032 can not determine the put formatter location. 

1033 """ 

1034 _ = makeExampleMetrics() 

1035 datastore = self.makeDatastore() 

1036 

1037 # Create multiple storage classes for testing different formulations 

1038 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1039 

1040 sccomp = StorageClass("Dummy") 

1041 compositeStorageClass = StorageClass( 

1042 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1043 ) 

1044 

1045 dimensions = self.universe.extract(("visit", "physical_filter")) 

1046 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1047 

1048 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1049 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1050 

1051 def raiser(ref: DatasetRef) -> None: 

1052 raise DatasetTypeNotSupportedError() 

1053 

1054 with unittest.mock.patch.object( 

1055 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1056 "_determine_put_formatter_location", 

1057 side_effect=raiser, 

1058 ): 

1059 # verify the non-composite ref execution path: 

1060 with self.assertRaises(DatasetTypeNotSupportedError): 

1061 datastore.getURIs(ref, predict=True) 

1062 

1063 # verify the composite-ref execution path: 

1064 with self.assertRaises(DatasetTypeNotSupportedError): 

1065 datastore.getURIs(compRef, predict=True) 

1066 

1067 def test_roots(self): 

1068 datastore = self.makeDatastore() 

1069 

1070 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1071 for root in datastore.roots.values(): 

1072 if root is not None: 

1073 self.assertTrue(root.exists()) 

1074 

1075 

1076class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1077 """Posix datastore tests but with checksums disabled.""" 

1078 

1079 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1080 

1081 def testChecksum(self) -> None: 

1082 """Ensure that checksums have not been calculated.""" 

1083 datastore = self.makeDatastore() 

1084 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1085 dimensions = self.universe.extract(("visit", "physical_filter")) 

1086 metrics = makeExampleMetrics() 

1087 

1088 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1089 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1090 

1091 # Configuration should have disabled checksum calculation 

1092 datastore.put(metrics, ref) 

1093 infos = datastore.getStoredItemsInfo(ref) 

1094 self.assertIsNone(infos[0].checksum) 

1095 

1096 # Remove put back but with checksums enabled explicitly 

1097 datastore.remove(ref) 

1098 datastore.useChecksum = True 

1099 datastore.put(metrics, ref) 

1100 

1101 infos = datastore.getStoredItemsInfo(ref) 

1102 self.assertIsNotNone(infos[0].checksum) 

1103 

1104 

1105class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1106 """Restrict trash test to FileDatastore.""" 

1107 

1108 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1109 

1110 def testTrash(self) -> None: 

1111 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1112 

1113 # Trash one of them. 

1114 ref = refs.pop() 

1115 uri = datastore.getURI(ref) 

1116 datastore.trash(ref) 

1117 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1118 datastore.emptyTrash() 

1119 self.assertFalse(uri.exists(), uri) 

1120 

1121 # Trash it again should be fine. 

1122 datastore.trash(ref) 

1123 

1124 # Trash multiple items at once. 

1125 subset = [refs.pop(), refs.pop()] 

1126 datastore.trash(subset) 

1127 datastore.emptyTrash() 

1128 

1129 # Remove a record and trash should do nothing. 

1130 # This is execution butler scenario. 

1131 ref = refs.pop() 

1132 uri = datastore.getURI(ref) 

1133 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1134 self.assertTrue(uri.exists()) 

1135 datastore.trash(ref) 

1136 datastore.emptyTrash() 

1137 self.assertTrue(uri.exists()) 

1138 

1139 # Switch on trust and it should delete the file. 

1140 datastore.trustGetRequest = True 

1141 datastore.trash([ref]) 

1142 self.assertFalse(uri.exists()) 

1143 

1144 # Remove multiples at once in trust mode. 

1145 subset = [refs.pop() for i in range(3)] 

1146 datastore.trash(subset) 

1147 datastore.trash(refs.pop()) # Check that a single ref can trash 

1148 

1149 

1150class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1151 """Test datastore cleans up on failure.""" 

1152 

1153 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1154 

1155 def setUp(self) -> None: 

1156 # Override the working directory before calling the base class 

1157 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1158 super().setUp() 

1159 

1160 def testCleanup(self) -> None: 

1161 """Test that a failed formatter write does cleanup a partial file.""" 

1162 metrics = makeExampleMetrics() 

1163 datastore = self.makeDatastore() 

1164 

1165 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1166 

1167 dimensions = self.universe.extract(("visit", "physical_filter")) 

1168 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1169 

1170 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1171 

1172 # Determine where the file will end up (we assume Formatters use 

1173 # the same file extension) 

1174 expectedUri = datastore.getURI(ref, predict=True) 

1175 self.assertEqual(expectedUri.fragment, "predicted") 

1176 

1177 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1178 

1179 # Try formatter that fails and formatter that fails and leaves 

1180 # a file behind 

1181 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1182 with self.subTest(formatter=formatter): 

1183 # Monkey patch the formatter 

1184 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1185 

1186 # Try to put the dataset, it should fail 

1187 with self.assertRaises(RuntimeError): 

1188 datastore.put(metrics, ref) 

1189 

1190 # Check that there is no file on disk 

1191 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1192 

1193 # Check that there is a directory 

1194 dir = expectedUri.dirname() 

1195 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1196 

1197 # Force YamlFormatter and check that this time a file is written 

1198 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1199 datastore.put(metrics, ref) 

1200 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1201 datastore.remove(ref) 

1202 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1203 

1204 

1205class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1206 """PosixDatastore specialization""" 

1207 

1208 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1209 uriScheme = "mem" 

1210 hasUnsupportedPut = False 

1211 ingestTransferModes = () 

1212 isEphemeral = True 

1213 rootKeys = None 

1214 validationCanFail = False 

1215 

1216 

1217class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1218 """ChainedDatastore specialization using a POSIXDatastore""" 

1219 

1220 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1221 hasUnsupportedPut = False 

1222 canIngestNoTransferAuto = False 

1223 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1224 isEphemeral = False 

1225 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1226 validationCanFail = True 

1227 

1228 

1229class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1230 """ChainedDatastore specialization using all InMemoryDatastore""" 

1231 

1232 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1233 validationCanFail = False 

1234 

1235 

1236class DatastoreConstraintsTests(DatastoreTestsBase): 

1237 """Basic tests of constraints model of Datastores.""" 

1238 

1239 def testConstraints(self) -> None: 

1240 """Test constraints model. Assumes that each test class has the 

1241 same constraints. 

1242 """ 

1243 metrics = makeExampleMetrics() 

1244 datastore = self.makeDatastore() 

1245 

1246 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1247 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1248 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1249 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1250 

1251 # Write empty file suitable for ingest check (JSON and YAML variants) 

1252 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1253 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1254 for datasetTypeName, sc, accepted in ( 

1255 ("metric", sc1, True), 

1256 ("metric5", sc1, False), 

1257 ("metric33", sc1, True), 

1258 ("metric5", sc2, True), 

1259 ): 

1260 # Choose different temp file depending on StorageClass 

1261 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1262 

1263 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1264 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1265 if accepted: 

1266 datastore.put(metrics, ref) 

1267 self.assertTrue(datastore.exists(ref)) 

1268 datastore.remove(ref) 

1269 

1270 # Try ingest 

1271 if self.canIngest: 

1272 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1273 self.assertTrue(datastore.exists(ref)) 

1274 datastore.remove(ref) 

1275 else: 

1276 with self.assertRaises(DatasetTypeNotSupportedError): 

1277 datastore.put(metrics, ref) 

1278 self.assertFalse(datastore.exists(ref)) 

1279 

1280 # Again with ingest 

1281 if self.canIngest: 

1282 with self.assertRaises(DatasetTypeNotSupportedError): 

1283 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1284 self.assertFalse(datastore.exists(ref)) 

1285 

1286 

1287class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1288 """PosixDatastore specialization""" 

1289 

1290 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1291 canIngest = True 

1292 

1293 def setUp(self) -> None: 

1294 # Override the working directory before calling the base class 

1295 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1296 super().setUp() 

1297 

1298 

1299class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1300 """InMemoryDatastore specialization.""" 

1301 

1302 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1303 canIngest = False 

1304 

1305 

1306class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1307 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1308 at the ChainedDatstore. 

1309 """ 

1310 

1311 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1312 

1313 

1314class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1315 """ChainedDatastore specialization using a POSIXDatastore.""" 

1316 

1317 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1318 

1319 

1320class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1321 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1322 

1323 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1324 canIngest = False 

1325 

1326 

1327class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1328 """Test that a chained datastore can control constraints per-datastore 

1329 even if child datastore would accept. 

1330 """ 

1331 

1332 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1333 

1334 def setUp(self) -> None: 

1335 # Override the working directory before calling the base class 

1336 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1337 super().setUp() 

1338 

1339 def testConstraints(self) -> None: 

1340 """Test chained datastore constraints model.""" 

1341 metrics = makeExampleMetrics() 

1342 datastore = self.makeDatastore() 

1343 

1344 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1345 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1346 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1347 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1348 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1349 

1350 # Write empty file suitable for ingest check (JSON and YAML variants) 

1351 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1352 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1353 

1354 for typeName, dataId, sc, accept, ingest in ( 

1355 ("metric", dataId1, sc1, (False, True, False), True), 

1356 ("metric5", dataId1, sc1, (False, False, False), False), 

1357 ("metric5", dataId2, sc1, (True, False, False), False), 

1358 ("metric33", dataId2, sc2, (True, True, False), True), 

1359 ("metric5", dataId1, sc2, (False, True, False), True), 

1360 ): 

1361 # Choose different temp file depending on StorageClass 

1362 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1363 

1364 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1365 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1366 if any(accept): 

1367 datastore.put(metrics, ref) 

1368 self.assertTrue(datastore.exists(ref)) 

1369 

1370 # Check each datastore inside the chained datastore 

1371 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1372 self.assertEqual( 

1373 childDatastore.exists(ref), 

1374 expected, 

1375 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1376 ) 

1377 

1378 datastore.remove(ref) 

1379 

1380 # Check that ingest works 

1381 if ingest: 

1382 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1383 self.assertTrue(datastore.exists(ref)) 

1384 

1385 # Check each datastore inside the chained datastore 

1386 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1387 # Ephemeral datastores means InMemory at the moment 

1388 # and that does not accept ingest of files. 

1389 if childDatastore.isEphemeral: 

1390 expected = False 

1391 self.assertEqual( 

1392 childDatastore.exists(ref), 

1393 expected, 

1394 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1395 ) 

1396 

1397 datastore.remove(ref) 

1398 else: 

1399 with self.assertRaises(DatasetTypeNotSupportedError): 

1400 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1401 

1402 else: 

1403 with self.assertRaises(DatasetTypeNotSupportedError): 

1404 datastore.put(metrics, ref) 

1405 self.assertFalse(datastore.exists(ref)) 

1406 

1407 # Again with ingest 

1408 with self.assertRaises(DatasetTypeNotSupportedError): 

1409 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1410 self.assertFalse(datastore.exists(ref)) 

1411 

1412 

1413class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1414 """Tests for datastore caching infrastructure.""" 

1415 

1416 @classmethod 

1417 def setUpClass(cls) -> None: 

1418 cls.storageClassFactory = StorageClassFactory() 

1419 cls.universe = DimensionUniverse() 

1420 

1421 # Ensure that we load the test storage class definitions. 

1422 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1423 cls.storageClassFactory.addFromConfig(scConfigFile) 

1424 

1425 def setUp(self) -> None: 

1426 self.id = 0 

1427 

1428 # Create a root that we can use for caching tests. 

1429 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1430 

1431 # Create some test dataset refs and associated test files 

1432 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1433 dimensions = self.universe.extract(("visit", "physical_filter")) 

1434 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1435 

1436 # Create list of refs and list of temporary files 

1437 n_datasets = 10 

1438 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1439 

1440 root_uri = ResourcePath(self.root, forceDirectory=True) 

1441 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1442 

1443 # Create test files. 

1444 for uri in self.files: 

1445 uri.write(b"0123456789") 

1446 

1447 # Create some composite refs with component files. 

1448 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1449 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1450 self.comp_files = [] 

1451 self.comp_refs = [] 

1452 for n, ref in enumerate(self.composite_refs): 

1453 component_refs = [] 

1454 component_files = [] 

1455 for component in sc.components: 

1456 component_ref = ref.makeComponentRef(component) 

1457 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1458 component_refs.append(component_ref) 

1459 component_files.append(file) 

1460 file.write(b"9876543210") 

1461 

1462 self.comp_files.append(component_files) 

1463 self.comp_refs.append(component_refs) 

1464 

1465 def tearDown(self) -> None: 

1466 if self.root is not None and os.path.exists(self.root): 

1467 shutil.rmtree(self.root, ignore_errors=True) 

1468 

1469 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1470 config = Config.fromYaml(config_str) 

1471 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1472 

1473 def testNoCacheDir(self) -> None: 

1474 config_str = """ 

1475cached: 

1476 root: null 

1477 cacheable: 

1478 metric0: true 

1479 """ 

1480 cache_manager = self._make_cache_manager(config_str) 

1481 

1482 # Look inside to check we don't have a cache directory 

1483 self.assertIsNone(cache_manager._cache_directory) 

1484 

1485 self.assertCache(cache_manager) 

1486 

1487 # Test that the cache directory is marked temporary 

1488 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1489 

1490 def testNoCacheDirReversed(self) -> None: 

1491 """Use default caching status and metric1 to false""" 

1492 config_str = """ 

1493cached: 

1494 root: null 

1495 default: true 

1496 cacheable: 

1497 metric1: false 

1498 """ 

1499 cache_manager = self._make_cache_manager(config_str) 

1500 

1501 self.assertCache(cache_manager) 

1502 

1503 def testEnvvarCacheDir(self) -> None: 

1504 config_str = f""" 

1505cached: 

1506 root: '{self.root}' 

1507 cacheable: 

1508 metric0: true 

1509 """ 

1510 

1511 root = ResourcePath(self.root, forceDirectory=True) 

1512 env_dir = root.join("somewhere", forceDirectory=True) 

1513 elsewhere = root.join("elsewhere", forceDirectory=True) 

1514 

1515 # Environment variable should override the config value. 

1516 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1517 cache_manager = self._make_cache_manager(config_str) 

1518 self.assertEqual(cache_manager.cache_directory, env_dir) 

1519 

1520 # This environment variable should not override the config value. 

1521 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1522 cache_manager = self._make_cache_manager(config_str) 

1523 self.assertEqual(cache_manager.cache_directory, root) 

1524 

1525 # No default setting. 

1526 config_str = """ 

1527cached: 

1528 root: null 

1529 default: true 

1530 cacheable: 

1531 metric1: false 

1532 """ 

1533 cache_manager = self._make_cache_manager(config_str) 

1534 

1535 # This environment variable should override the config value. 

1536 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1537 cache_manager = self._make_cache_manager(config_str) 

1538 self.assertEqual(cache_manager.cache_directory, env_dir) 

1539 

1540 # If both environment variables are set the main (not IF_UNSET) 

1541 # variable should win. 

1542 with unittest.mock.patch.dict( 

1543 os.environ, 

1544 { 

1545 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1546 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1547 }, 

1548 ): 

1549 cache_manager = self._make_cache_manager(config_str) 

1550 self.assertEqual(cache_manager.cache_directory, env_dir) 

1551 

1552 # Use the API to set the environment variable, making sure that the 

1553 # variable is reset on exit. 

1554 with unittest.mock.patch.dict( 

1555 os.environ, 

1556 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1557 ): 

1558 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1559 self.assertTrue(defined) 

1560 cache_manager = self._make_cache_manager(config_str) 

1561 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1562 

1563 # Now create the cache manager ahead of time and set the fallback 

1564 # later. 

1565 cache_manager = self._make_cache_manager(config_str) 

1566 self.assertIsNone(cache_manager._cache_directory) 

1567 with unittest.mock.patch.dict( 

1568 os.environ, 

1569 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1570 ): 

1571 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1572 self.assertTrue(defined) 

1573 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1574 

1575 def testExplicitCacheDir(self) -> None: 

1576 config_str = f""" 

1577cached: 

1578 root: '{self.root}' 

1579 cacheable: 

1580 metric0: true 

1581 """ 

1582 cache_manager = self._make_cache_manager(config_str) 

1583 

1584 # Look inside to check we do have a cache directory. 

1585 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1586 

1587 self.assertCache(cache_manager) 

1588 

1589 # Test that the cache directory is not marked temporary 

1590 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1591 

1592 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1593 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1594 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1595 

1596 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1597 self.assertIsInstance(uri, ResourcePath) 

1598 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1599 

1600 # Check presence in cache using ref and then using file extension. 

1601 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1602 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1603 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1604 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1605 

1606 # Cached file should no longer exist but uncached file should be 

1607 # unaffected. 

1608 self.assertFalse(self.files[0].exists()) 

1609 self.assertTrue(self.files[1].exists()) 

1610 

1611 # Should find this file and it should be within the cache directory. 

1612 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1613 self.assertTrue(found.exists()) 

1614 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1615 

1616 # Should not be able to find these in cache 

1617 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1618 self.assertIsNone(found) 

1619 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1620 self.assertIsNone(found) 

1621 

1622 def testNoCache(self) -> None: 

1623 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1624 for uri, ref in zip(self.files, self.refs, strict=True): 

1625 self.assertFalse(cache_manager.should_be_cached(ref)) 

1626 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1627 self.assertFalse(cache_manager.known_to_cache(ref)) 

1628 with cache_manager.find_in_cache(ref, ".txt") as found: 

1629 self.assertIsNone(found, msg=f"{cache_manager}") 

1630 

1631 def _expiration_config(self, mode: str, threshold: int) -> str: 

1632 return f""" 

1633cached: 

1634 default: true 

1635 expiry: 

1636 mode: {mode} 

1637 threshold: {threshold} 

1638 cacheable: 

1639 unused: true 

1640 """ 

1641 

1642 def testCacheExpiryFiles(self) -> None: 

1643 threshold = 2 # Keep at least 2 files. 

1644 mode = "files" 

1645 config_str = self._expiration_config(mode, threshold) 

1646 

1647 cache_manager = self._make_cache_manager(config_str) 

1648 

1649 # Check that an empty cache returns unknown for arbitrary ref 

1650 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1651 

1652 # Should end with datasets: 2, 3, 4 

1653 self.assertExpiration(cache_manager, 5, threshold + 1) 

1654 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1655 

1656 # Check that we will not expire a file that is actively in use. 

1657 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1658 self.assertIsNotNone(found) 

1659 

1660 # Trigger cache expiration that should remove the file 

1661 # we just retrieved. Should now have: 3, 4, 5 

1662 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1663 self.assertIsNotNone(cached) 

1664 

1665 # Cache should still report the standard file count. 

1666 self.assertEqual(cache_manager.file_count, threshold + 1) 

1667 

1668 # Add additional entry to cache. 

1669 # Should now have 4, 5, 6 

1670 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1671 self.assertIsNotNone(cached) 

1672 

1673 # Is the file still there? 

1674 self.assertTrue(found.exists()) 

1675 

1676 # Can we read it? 

1677 data = found.read() 

1678 self.assertGreater(len(data), 0) 

1679 

1680 # Outside context the file should no longer exist. 

1681 self.assertFalse(found.exists()) 

1682 

1683 # File count should not have changed. 

1684 self.assertEqual(cache_manager.file_count, threshold + 1) 

1685 

1686 # Dataset 2 was in the exempt directory but because hardlinks 

1687 # are used it was deleted from the main cache during cache expiry 

1688 # above and so should no longer be found. 

1689 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1690 self.assertIsNone(found) 

1691 

1692 # And the one stored after it is also gone. 

1693 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1694 self.assertIsNone(found) 

1695 

1696 # But dataset 4 is present. 

1697 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1698 self.assertIsNotNone(found) 

1699 

1700 # Adding a new dataset to the cache should now delete it. 

1701 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1702 

1703 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1704 self.assertIsNone(found) 

1705 

1706 def testCacheExpiryDatasets(self) -> None: 

1707 threshold = 2 # Keep 2 datasets. 

1708 mode = "datasets" 

1709 config_str = self._expiration_config(mode, threshold) 

1710 

1711 cache_manager = self._make_cache_manager(config_str) 

1712 self.assertExpiration(cache_manager, 5, threshold + 1) 

1713 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1714 

1715 def testCacheExpiryDatasetsComposite(self) -> None: 

1716 threshold = 2 # Keep 2 datasets. 

1717 mode = "datasets" 

1718 config_str = self._expiration_config(mode, threshold) 

1719 

1720 cache_manager = self._make_cache_manager(config_str) 

1721 

1722 n_datasets = 3 

1723 for i in range(n_datasets): 

1724 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1725 cached = cache_manager.move_to_cache(component_file, component_ref) 

1726 self.assertIsNotNone(cached) 

1727 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1728 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1729 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1730 

1731 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1732 

1733 # Write two new non-composite and the number of files should drop. 

1734 self.assertExpiration(cache_manager, 2, 5) 

1735 

1736 def testCacheExpirySize(self) -> None: 

1737 threshold = 55 # Each file is 10 bytes 

1738 mode = "size" 

1739 config_str = self._expiration_config(mode, threshold) 

1740 

1741 cache_manager = self._make_cache_manager(config_str) 

1742 self.assertExpiration(cache_manager, 10, 6) 

1743 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1744 

1745 def assertExpiration( 

1746 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1747 ) -> None: 

1748 """Insert the datasets and then check the number retained.""" 

1749 for i in range(n_datasets): 

1750 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1751 self.assertIsNotNone(cached) 

1752 

1753 self.assertEqual(cache_manager.file_count, n_retained) 

1754 

1755 # The oldest file should not be in the cache any more. 

1756 for i in range(n_datasets): 

1757 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1758 if i >= n_datasets - n_retained: 

1759 self.assertIsInstance(found, ResourcePath) 

1760 else: 

1761 self.assertIsNone(found) 

1762 

1763 def testCacheExpiryAge(self) -> None: 

1764 threshold = 1 # Expire older than 2 seconds 

1765 mode = "age" 

1766 config_str = self._expiration_config(mode, threshold) 

1767 

1768 cache_manager = self._make_cache_manager(config_str) 

1769 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1770 

1771 # Insert 3 files, then sleep, then insert more. 

1772 for i in range(2): 

1773 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1774 self.assertIsNotNone(cached) 

1775 time.sleep(2.0) 

1776 for j in range(4): 

1777 i = 2 + j # Continue the counting 

1778 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1779 self.assertIsNotNone(cached) 

1780 

1781 # Only the files written after the sleep should exist. 

1782 self.assertEqual(cache_manager.file_count, 4) 

1783 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1784 self.assertIsNone(found) 

1785 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1786 self.assertIsInstance(found, ResourcePath) 

1787 

1788 

1789class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1790 """Test the null datastore.""" 

1791 

1792 storageClassFactory = StorageClassFactory() 

1793 

1794 def test_basics(self) -> None: 

1795 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1796 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1797 

1798 null = NullDatastore(None, None) 

1799 

1800 self.assertFalse(null.exists(ref)) 

1801 self.assertFalse(null.knows(ref)) 

1802 knows = null.knows_these([ref]) 

1803 self.assertFalse(knows[ref]) 

1804 null.validateConfiguration(ref) 

1805 

1806 with self.assertRaises(FileNotFoundError): 

1807 null.get(ref) 

1808 with self.assertRaises(NotImplementedError): 

1809 null.put("", ref) 

1810 with self.assertRaises(FileNotFoundError): 

1811 null.getURI(ref) 

1812 with self.assertRaises(FileNotFoundError): 

1813 null.getURIs(ref) 

1814 with self.assertRaises(FileNotFoundError): 

1815 null.getManyURIs([ref]) 

1816 with self.assertRaises(NotImplementedError): 

1817 null.getLookupKeys() 

1818 with self.assertRaises(NotImplementedError): 

1819 null.import_records({}) 

1820 with self.assertRaises(NotImplementedError): 

1821 null.export_records([]) 

1822 with self.assertRaises(NotImplementedError): 

1823 null.export([ref]) 

1824 with self.assertRaises(NotImplementedError): 

1825 null.transfer(null, ref) 

1826 with self.assertRaises(NotImplementedError): 

1827 null.emptyTrash() 

1828 with self.assertRaises(NotImplementedError): 

1829 null.trash(ref) 

1830 with self.assertRaises(NotImplementedError): 

1831 null.forget([ref]) 

1832 with self.assertRaises(NotImplementedError): 

1833 null.remove(ref) 

1834 with self.assertRaises(NotImplementedError): 

1835 null.retrieveArtifacts([ref], ResourcePath(".")) 

1836 with self.assertRaises(NotImplementedError): 

1837 null.transfer_from(null, [ref]) 

1838 with self.assertRaises(NotImplementedError): 

1839 null.ingest() 

1840 

1841 

1842class DatasetRefURIsTestCase(unittest.TestCase): 

1843 """Tests for DatasetRefURIs.""" 

1844 

1845 def testSequenceAccess(self) -> None: 

1846 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1847 uris = DatasetRefURIs() 

1848 

1849 self.assertEqual(len(uris), 2) 

1850 self.assertEqual(uris[0], None) 

1851 self.assertEqual(uris[1], {}) 

1852 

1853 primaryURI = ResourcePath("1/2/3") 

1854 componentURI = ResourcePath("a/b/c") 

1855 

1856 # affirm that DatasetRefURIs does not support MutableSequence functions 

1857 with self.assertRaises(TypeError): 

1858 uris[0] = primaryURI 

1859 with self.assertRaises(TypeError): 

1860 uris[1] = {"foo": componentURI} 

1861 

1862 # but DatasetRefURIs can be set by property name: 

1863 uris.primaryURI = primaryURI 

1864 uris.componentURIs = {"foo": componentURI} 

1865 self.assertEqual(uris.primaryURI, primaryURI) 

1866 self.assertEqual(uris[0], primaryURI) 

1867 

1868 primary, components = uris 

1869 self.assertEqual(primary, primaryURI) 

1870 self.assertEqual(components, {"foo": componentURI}) 

1871 

1872 def testRepr(self) -> None: 

1873 """Verify __repr__ output.""" 

1874 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1875 self.assertEqual( 

1876 repr(uris), 

1877 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1878 ) 

1879 

1880 

1881class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1882 """Test the StoredFileInfo class.""" 

1883 

1884 storageClassFactory = StorageClassFactory() 

1885 

1886 def test_StoredFileInfo(self) -> None: 

1887 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1888 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1889 

1890 record = dict( 

1891 storage_class="StructuredDataDict", 

1892 formatter="lsst.daf.butler.Formatter", 

1893 path="a/b/c.txt", 

1894 component="component", 

1895 dataset_id=ref.id, 

1896 checksum=None, 

1897 file_size=5, 

1898 ) 

1899 info = StoredFileInfo.from_record(record) 

1900 

1901 self.assertEqual(info.dataset_id, ref.id) 

1902 self.assertEqual(info.to_record(), record) 

1903 

1904 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1905 rebased = info.rebase(ref2) 

1906 self.assertEqual(rebased.dataset_id, ref2.id) 

1907 self.assertEqual(rebased.rebase(ref), info) 

1908 

1909 with self.assertRaises(TypeError): 

1910 rebased.update(formatter=42) 

1911 

1912 with self.assertRaises(ValueError): 

1913 rebased.update(something=42, new="42") 

1914 

1915 # Check that pickle works on StoredFileInfo. 

1916 pickled_info = pickle.dumps(info) 

1917 unpickled_info = pickle.loads(pickled_info) 

1918 self.assertEqual(unpickled_info, info) 

1919 

1920 

1921if __name__ == "__main__": 

1922 unittest.main()