Coverage for tests/test_datastore.py: 11%

1060 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-05 01:26 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import tempfile 

28import time 

29import unittest 

30import unittest.mock 

31import uuid 

32from collections.abc import Callable 

33from typing import Any, cast 

34 

35import lsst.utils.tests 

36import yaml 

37from lsst.daf.butler import ( 

38 Config, 

39 DataCoordinate, 

40 DatasetRef, 

41 DatasetRefURIs, 

42 DatasetType, 

43 DatasetTypeNotSupportedError, 

44 Datastore, 

45 DatastoreCacheManager, 

46 DatastoreCacheManagerConfig, 

47 DatastoreConfig, 

48 DatastoreDisabledCacheManager, 

49 DatastoreValidationError, 

50 DimensionUniverse, 

51 FileDataset, 

52 NullDatastore, 

53 StorageClass, 

54 StorageClassFactory, 

55 StoredFileInfo, 

56) 

57from lsst.daf.butler.formatters.yaml import YamlFormatter 

58from lsst.daf.butler.tests import ( 

59 BadNoWriteFormatter, 

60 BadWriteFormatter, 

61 DatasetTestHelper, 

62 DatastoreTestHelper, 

63 DummyRegistry, 

64 MetricsExample, 

65 MetricsExampleDataclass, 

66 MetricsExampleModel, 

67) 

68from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

69from lsst.daf.butler.tests.utils import TestCaseMixin 

70from lsst.resources import ResourcePath 

71from lsst.utils import doImport 

72 

73TESTDIR = os.path.dirname(__file__) 

74 

75 

76def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

77 """Make example dataset that can be stored in butler.""" 

78 if use_none: 

79 array = None 

80 else: 

81 array = [563, 234, 456.7, 105, 2054, -1045] 

82 return MetricsExample( 

83 {"AM1": 5.2, "AM2": 30.6}, 

84 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

85 array, 

86 ) 

87 

88 

89class TransactionTestError(Exception): 

90 """Specific error for transactions, to prevent misdiagnosing 

91 that might otherwise occur when a standard exception is used. 

92 """ 

93 

94 pass 

95 

96 

97class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

98 """Support routines for datastore testing""" 

99 

100 root: str | None = None 

101 universe: DimensionUniverse 

102 storageClassFactory: StorageClassFactory 

103 

104 @classmethod 

105 def setUpClass(cls) -> None: 

106 # Storage Classes are fixed for all datastores in these tests 

107 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

108 cls.storageClassFactory = StorageClassFactory() 

109 cls.storageClassFactory.addFromConfig(scConfigFile) 

110 

111 # Read the Datastore config so we can get the class 

112 # information (since we should not assume the constructor 

113 # name here, but rely on the configuration file itself) 

114 datastoreConfig = DatastoreConfig(cls.configFile) 

115 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

116 cls.universe = DimensionUniverse() 

117 

118 def setUp(self) -> None: 

119 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

120 

121 def tearDown(self) -> None: 

122 if self.root is not None and os.path.exists(self.root): 

123 shutil.rmtree(self.root, ignore_errors=True) 

124 

125 

126class DatastoreTests(DatastoreTestsBase): 

127 """Some basic tests of a simple datastore.""" 

128 

129 hasUnsupportedPut = True 

130 rootKeys: tuple[str, ...] | None = None 

131 isEphemeral: bool = False 

132 validationCanFail: bool = False 

133 

134 def testConfigRoot(self) -> None: 

135 full = DatastoreConfig(self.configFile) 

136 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

137 newroot = "/random/location" 

138 self.datastoreType.setConfigRoot(newroot, config, full) 

139 if self.rootKeys: 

140 for k in self.rootKeys: 

141 self.assertIn(newroot, config[k]) 

142 

143 def testConstructor(self) -> None: 

144 datastore = self.makeDatastore() 

145 self.assertIsNotNone(datastore) 

146 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

147 

148 def testConfigurationValidation(self) -> None: 

149 datastore = self.makeDatastore() 

150 sc = self.storageClassFactory.getStorageClass("ThingOne") 

151 datastore.validateConfiguration([sc]) 

152 

153 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

154 if self.validationCanFail: 

155 with self.assertRaises(DatastoreValidationError): 

156 datastore.validateConfiguration([sc2], logFailures=True) 

157 

158 dimensions = self.universe.extract(("visit", "physical_filter")) 

159 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

160 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

161 datastore.validateConfiguration([ref]) 

162 

163 def testParameterValidation(self) -> None: 

164 """Check that parameters are validated""" 

165 sc = self.storageClassFactory.getStorageClass("ThingOne") 

166 dimensions = self.universe.extract(("visit", "physical_filter")) 

167 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

168 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

169 datastore = self.makeDatastore() 

170 data = {1: 2, 3: 4} 

171 datastore.put(data, ref) 

172 newdata = datastore.get(ref) 

173 self.assertEqual(data, newdata) 

174 with self.assertRaises(KeyError): 

175 newdata = datastore.get(ref, parameters={"missing": 5}) 

176 

177 def testBasicPutGet(self) -> None: 

178 metrics = makeExampleMetrics() 

179 datastore = self.makeDatastore() 

180 

181 # Create multiple storage classes for testing different formulations 

182 storageClasses = [ 

183 self.storageClassFactory.getStorageClass(sc) 

184 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

185 ] 

186 

187 dimensions = self.universe.extract(("visit", "physical_filter")) 

188 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

189 dataId2 = dict({"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"}) 

190 

191 for sc in storageClasses: 

192 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

193 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

194 

195 # Make sure that using getManyURIs without predicting before the 

196 # dataset has been put raises. 

197 with self.assertRaises(FileNotFoundError): 

198 datastore.getManyURIs([ref], predict=False) 

199 

200 # Make sure that using getManyURIs with predicting before the 

201 # dataset has been put predicts the URI. 

202 uris = datastore.getManyURIs([ref, ref2], predict=True) 

203 self.assertIn("52", uris[ref].primaryURI.geturl()) 

204 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

205 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

206 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

207 

208 datastore.put(metrics, ref) 

209 

210 # Does it exist? 

211 self.assertTrue(datastore.exists(ref)) 

212 self.assertTrue(datastore.knows(ref)) 

213 multi = datastore.knows_these([ref]) 

214 self.assertTrue(multi[ref]) 

215 multi = datastore.mexists([ref, ref2]) 

216 self.assertTrue(multi[ref]) 

217 self.assertFalse(multi[ref2]) 

218 

219 # Get 

220 metricsOut = datastore.get(ref, parameters=None) 

221 self.assertEqual(metrics, metricsOut) 

222 

223 uri = datastore.getURI(ref) 

224 self.assertEqual(uri.scheme, self.uriScheme) 

225 

226 uris = datastore.getManyURIs([ref]) 

227 self.assertEqual(len(uris), 1) 

228 ref, uri = uris.popitem() 

229 self.assertTrue(uri.primaryURI.exists()) 

230 self.assertFalse(uri.componentURIs) 

231 

232 # Get a component -- we need to construct new refs for them 

233 # with derived storage classes but with parent ID 

234 for comp in ("data", "output"): 

235 compRef = ref.makeComponentRef(comp) 

236 output = datastore.get(compRef) 

237 self.assertEqual(output, getattr(metricsOut, comp)) 

238 

239 uri = datastore.getURI(compRef) 

240 self.assertEqual(uri.scheme, self.uriScheme) 

241 

242 uris = datastore.getManyURIs([compRef]) 

243 self.assertEqual(len(uris), 1) 

244 

245 storageClass = sc 

246 

247 # Check that we can put a metric with None in a component and 

248 # get it back as None 

249 metricsNone = makeExampleMetrics(use_none=True) 

250 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

251 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

252 datastore.put(metricsNone, refNone) 

253 

254 comp = "data" 

255 for comp in ("data", "output"): 

256 compRef = refNone.makeComponentRef(comp) 

257 output = datastore.get(compRef) 

258 self.assertEqual(output, getattr(metricsNone, comp)) 

259 

260 # Check that a put fails if the dataset type is not supported 

261 if self.hasUnsupportedPut: 

262 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

263 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

264 with self.assertRaises(DatasetTypeNotSupportedError): 

265 datastore.put(metrics, ref) 

266 

267 # These should raise 

268 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

269 with self.assertRaises(FileNotFoundError): 

270 # non-existing file 

271 datastore.get(ref) 

272 

273 # Get a URI from it 

274 uri = datastore.getURI(ref, predict=True) 

275 self.assertEqual(uri.scheme, self.uriScheme) 

276 

277 with self.assertRaises(FileNotFoundError): 

278 datastore.getURI(ref) 

279 

280 def testTrustGetRequest(self) -> None: 

281 """Check that we can get datasets that registry knows nothing about.""" 

282 datastore = self.makeDatastore() 

283 

284 # Skip test if the attribute is not defined 

285 if not hasattr(datastore, "trustGetRequest"): 

286 return 

287 

288 metrics = makeExampleMetrics() 

289 

290 i = 0 

291 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

292 i += 1 

293 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

294 

295 if sc_name == "StructuredComposite": 

296 disassembled = True 

297 else: 

298 disassembled = False 

299 

300 # Start datastore in default configuration of using registry 

301 datastore.trustGetRequest = False 

302 

303 # Create multiple storage classes for testing with or without 

304 # disassembly 

305 sc = self.storageClassFactory.getStorageClass(sc_name) 

306 dimensions = self.universe.extract(("visit", "physical_filter")) 

307 

308 dataId = dict({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"}) 

309 

310 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

311 datastore.put(metrics, ref) 

312 

313 # Does it exist? 

314 self.assertTrue(datastore.exists(ref)) 

315 self.assertTrue(datastore.knows(ref)) 

316 multi = datastore.knows_these([ref]) 

317 self.assertTrue(multi[ref]) 

318 multi = datastore.mexists([ref]) 

319 self.assertTrue(multi[ref]) 

320 

321 # Get 

322 metricsOut = datastore.get(ref) 

323 self.assertEqual(metrics, metricsOut) 

324 

325 # Get the URI(s) 

326 primaryURI, componentURIs = datastore.getURIs(ref) 

327 if disassembled: 

328 self.assertIsNone(primaryURI) 

329 self.assertEqual(len(componentURIs), 3) 

330 else: 

331 self.assertIn(datasetTypeName, primaryURI.path) 

332 self.assertFalse(componentURIs) 

333 

334 # Delete registry entry so now we are trusting 

335 datastore.removeStoredItemInfo(ref) 

336 

337 # Now stop trusting and check that things break 

338 datastore.trustGetRequest = False 

339 

340 # Does it exist? 

341 self.assertFalse(datastore.exists(ref)) 

342 self.assertFalse(datastore.knows(ref)) 

343 multi = datastore.knows_these([ref]) 

344 self.assertFalse(multi[ref]) 

345 multi = datastore.mexists([ref]) 

346 self.assertFalse(multi[ref]) 

347 

348 with self.assertRaises(FileNotFoundError): 

349 datastore.get(ref) 

350 

351 if sc_name != "StructuredDataNoComponents": 

352 with self.assertRaises(FileNotFoundError): 

353 datastore.get(ref.makeComponentRef("data")) 

354 

355 # URI should fail unless we ask for prediction 

356 with self.assertRaises(FileNotFoundError): 

357 datastore.getURIs(ref) 

358 

359 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

360 if disassembled: 

361 self.assertIsNone(predicted_primary) 

362 self.assertEqual(len(predicted_disassembled), 3) 

363 for uri in predicted_disassembled.values(): 

364 self.assertEqual(uri.fragment, "predicted") 

365 self.assertIn(datasetTypeName, uri.path) 

366 else: 

367 self.assertIn(datasetTypeName, predicted_primary.path) 

368 self.assertFalse(predicted_disassembled) 

369 self.assertEqual(predicted_primary.fragment, "predicted") 

370 

371 # Now enable registry-free trusting mode 

372 datastore.trustGetRequest = True 

373 

374 # Try again to get it 

375 metricsOut = datastore.get(ref) 

376 self.assertEqual(metricsOut, metrics) 

377 

378 # Does it exist? 

379 self.assertTrue(datastore.exists(ref)) 

380 

381 # Get a component 

382 if sc_name != "StructuredDataNoComponents": 

383 comp = "data" 

384 compRef = ref.makeComponentRef(comp) 

385 output = datastore.get(compRef) 

386 self.assertEqual(output, getattr(metrics, comp)) 

387 

388 # Get the URI -- if we trust this should work even without 

389 # enabling prediction. 

390 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

391 self.assertEqual(primaryURI2, primaryURI) 

392 self.assertEqual(componentURIs2, componentURIs) 

393 

394 # Check for compatible storage class. 

395 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

396 # Make new dataset ref with compatible storage class. 

397 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

398 

399 # Without `set_retrieve_dataset_type_method` it will fail to 

400 # find correct file. 

401 self.assertFalse(datastore.exists(ref_comp)) 

402 with self.assertRaises(FileNotFoundError): 

403 datastore.get(ref_comp) 

404 with self.assertRaises(FileNotFoundError): 

405 datastore.get(ref, storageClass="StructuredDataDictJson") 

406 

407 # Need a special method to generate stored dataset type. 

408 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

409 if name == ref.datasetType.name: 

410 return ref.datasetType 

411 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

412 

413 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

414 

415 # Storage class override with original dataset ref. 

416 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

417 self.assertIsInstance(metrics_as_dict, dict) 

418 

419 # get() should return a dict now. 

420 metrics_as_dict = datastore.get(ref_comp) 

421 self.assertIsInstance(metrics_as_dict, dict) 

422 

423 # exists() should work as well. 

424 self.assertTrue(datastore.exists(ref_comp)) 

425 

426 datastore.set_retrieve_dataset_type_method(None) 

427 

428 def testDisassembly(self) -> None: 

429 """Test disassembly within datastore.""" 

430 metrics = makeExampleMetrics() 

431 if self.isEphemeral: 

432 # in-memory datastore does not disassemble 

433 return 

434 

435 # Create multiple storage classes for testing different formulations 

436 # of composites. One of these will not disassemble to provide 

437 # a reference. 

438 storageClasses = [ 

439 self.storageClassFactory.getStorageClass(sc) 

440 for sc in ( 

441 "StructuredComposite", 

442 "StructuredCompositeTestA", 

443 "StructuredCompositeTestB", 

444 "StructuredCompositeReadComp", 

445 "StructuredData", # No disassembly 

446 "StructuredCompositeReadCompNoDisassembly", 

447 ) 

448 ] 

449 

450 # Create the test datastore 

451 datastore = self.makeDatastore() 

452 

453 # Dummy dataId 

454 dimensions = self.universe.extract(("visit", "physical_filter")) 

455 dataId = dict({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

456 

457 for i, sc in enumerate(storageClasses): 

458 with self.subTest(storageClass=sc.name): 

459 # Create a different dataset type each time round 

460 # so that a test failure in this subtest does not trigger 

461 # a cascade of tests because of file clashes 

462 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

463 

464 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

465 

466 datastore.put(metrics, ref) 

467 

468 baseURI, compURIs = datastore.getURIs(ref) 

469 if disassembled: 

470 self.assertIsNone(baseURI) 

471 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

472 else: 

473 self.assertIsNotNone(baseURI) 

474 self.assertEqual(compURIs, {}) 

475 

476 metrics_get = datastore.get(ref) 

477 self.assertEqual(metrics_get, metrics) 

478 

479 # Retrieve the composite with read parameter 

480 stop = 4 

481 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

482 self.assertEqual(metrics_get.summary, metrics.summary) 

483 self.assertEqual(metrics_get.output, metrics.output) 

484 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

485 

486 # Retrieve a component 

487 data = datastore.get(ref.makeComponentRef("data")) 

488 self.assertEqual(data, metrics.data) 

489 

490 # On supported storage classes attempt to access a read 

491 # only component 

492 if "ReadComp" in sc.name: 

493 cRef = ref.makeComponentRef("counter") 

494 counter = datastore.get(cRef) 

495 self.assertEqual(counter, len(metrics.data)) 

496 

497 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

498 self.assertEqual(counter, stop) 

499 

500 datastore.remove(ref) 

501 

502 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

503 metrics = makeExampleMetrics() 

504 datastore = self.makeDatastore() 

505 # Put 

506 dimensions = self.universe.extract(("visit", "physical_filter")) 

507 sc = self.storageClassFactory.getStorageClass("StructuredData") 

508 refs = [] 

509 for i in range(n_refs): 

510 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

511 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

512 datastore.put(metrics, ref) 

513 

514 # Does it exist? 

515 self.assertTrue(datastore.exists(ref)) 

516 

517 # Get 

518 metricsOut = datastore.get(ref) 

519 self.assertEqual(metrics, metricsOut) 

520 refs.append(ref) 

521 

522 return datastore, *refs 

523 

524 def testRemove(self) -> None: 

525 datastore, ref = self.prepDeleteTest() 

526 

527 # Remove 

528 datastore.remove(ref) 

529 

530 # Does it exist? 

531 self.assertFalse(datastore.exists(ref)) 

532 

533 # Do we now get a predicted URI? 

534 uri = datastore.getURI(ref, predict=True) 

535 self.assertEqual(uri.fragment, "predicted") 

536 

537 # Get should now fail 

538 with self.assertRaises(FileNotFoundError): 

539 datastore.get(ref) 

540 # Can only delete once 

541 with self.assertRaises(FileNotFoundError): 

542 datastore.remove(ref) 

543 

544 def testForget(self) -> None: 

545 datastore, ref = self.prepDeleteTest() 

546 

547 # Remove 

548 datastore.forget([ref]) 

549 

550 # Does it exist (as far as we know)? 

551 self.assertFalse(datastore.exists(ref)) 

552 

553 # Do we now get a predicted URI? 

554 uri = datastore.getURI(ref, predict=True) 

555 self.assertEqual(uri.fragment, "predicted") 

556 

557 # Get should now fail 

558 with self.assertRaises(FileNotFoundError): 

559 datastore.get(ref) 

560 

561 # Forgetting again is a silent no-op 

562 datastore.forget([ref]) 

563 

564 # Predicted URI should still point to the file. 

565 self.assertTrue(uri.exists()) 

566 

567 def testTransfer(self) -> None: 

568 metrics = makeExampleMetrics() 

569 

570 dimensions = self.universe.extract(("visit", "physical_filter")) 

571 dataId = dict({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"}) 

572 

573 sc = self.storageClassFactory.getStorageClass("StructuredData") 

574 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

575 

576 inputDatastore = self.makeDatastore("test_input_datastore") 

577 outputDatastore = self.makeDatastore("test_output_datastore") 

578 

579 inputDatastore.put(metrics, ref) 

580 outputDatastore.transfer(inputDatastore, ref) 

581 

582 metricsOut = outputDatastore.get(ref) 

583 self.assertEqual(metrics, metricsOut) 

584 

585 def testBasicTransaction(self) -> None: 

586 datastore = self.makeDatastore() 

587 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

588 dimensions = self.universe.extract(("visit", "physical_filter")) 

589 nDatasets = 6 

590 dataIds = [ 

591 dict({"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"}) 

592 for i in range(nDatasets) 

593 ] 

594 data = [ 

595 ( 

596 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

597 makeExampleMetrics(), 

598 ) 

599 for dataId in dataIds 

600 ] 

601 succeed = data[: nDatasets // 2] 

602 fail = data[nDatasets // 2 :] 

603 # All datasets added in this transaction should continue to exist 

604 with datastore.transaction(): 

605 for ref, metrics in succeed: 

606 datastore.put(metrics, ref) 

607 # Whereas datasets added in this transaction should not 

608 with self.assertRaises(TransactionTestError): 

609 with datastore.transaction(): 

610 for ref, metrics in fail: 

611 datastore.put(metrics, ref) 

612 raise TransactionTestError("This should propagate out of the context manager") 

613 # Check for datasets that should exist 

614 for ref, metrics in succeed: 

615 # Does it exist? 

616 self.assertTrue(datastore.exists(ref)) 

617 # Get 

618 metricsOut = datastore.get(ref, parameters=None) 

619 self.assertEqual(metrics, metricsOut) 

620 # URI 

621 uri = datastore.getURI(ref) 

622 self.assertEqual(uri.scheme, self.uriScheme) 

623 # Check for datasets that should not exist 

624 for ref, _ in fail: 

625 # These should raise 

626 with self.assertRaises(FileNotFoundError): 

627 # non-existing file 

628 datastore.get(ref) 

629 with self.assertRaises(FileNotFoundError): 

630 datastore.getURI(ref) 

631 

632 def testNestedTransaction(self) -> None: 

633 datastore = self.makeDatastore() 

634 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

635 dimensions = self.universe.extract(("visit", "physical_filter")) 

636 metrics = makeExampleMetrics() 

637 

638 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

639 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

640 datastore.put(metrics, refBefore) 

641 with self.assertRaises(TransactionTestError): 

642 with datastore.transaction(): 

643 dataId = dict({"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"}) 

644 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

645 datastore.put(metrics, refOuter) 

646 with datastore.transaction(): 

647 dataId = dict({"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"}) 

648 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

649 datastore.put(metrics, refInner) 

650 # All datasets should exist 

651 for ref in (refBefore, refOuter, refInner): 

652 metricsOut = datastore.get(ref, parameters=None) 

653 self.assertEqual(metrics, metricsOut) 

654 raise TransactionTestError("This should roll back the transaction") 

655 # Dataset(s) inserted before the transaction should still exist 

656 metricsOut = datastore.get(refBefore, parameters=None) 

657 self.assertEqual(metrics, metricsOut) 

658 # But all datasets inserted during the (rolled back) transaction 

659 # should be gone 

660 with self.assertRaises(FileNotFoundError): 

661 datastore.get(refOuter) 

662 with self.assertRaises(FileNotFoundError): 

663 datastore.get(refInner) 

664 

665 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

666 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

667 dimensions = self.universe.extract(("visit", "physical_filter")) 

668 metrics = makeExampleMetrics() 

669 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

670 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

671 return metrics, ref 

672 

673 def runIngestTest( 

674 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

675 ) -> None: 

676 metrics, ref = self._prepareIngestTest() 

677 # The file will be deleted after the test. 

678 # For symlink tests this leads to a situation where the datastore 

679 # points to a file that does not exist. This will make os.path.exist 

680 # return False but then the new symlink will fail with 

681 # FileExistsError later in the code so the test still passes. 

682 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

683 with open(path, "w") as fd: 

684 yaml.dump(metrics._asdict(), stream=fd) 

685 func(metrics, path, ref) 

686 

687 def testIngestNoTransfer(self) -> None: 

688 """Test ingesting existing files with no transfer.""" 

689 for mode in (None, "auto"): 

690 # Some datastores have auto but can't do in place transfer 

691 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

692 continue 

693 

694 with self.subTest(mode=mode): 

695 datastore = self.makeDatastore() 

696 

697 def succeed( 

698 obj: MetricsExample, 

699 path: str, 

700 ref: DatasetRef, 

701 mode: str | None = mode, 

702 datastore: Datastore = datastore, 

703 ) -> None: 

704 """Ingest a file already in the datastore root.""" 

705 # first move it into the root, and adjust the path 

706 # accordingly 

707 path = shutil.copy(path, datastore.root.ospath) 

708 path = os.path.relpath(path, start=datastore.root.ospath) 

709 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

710 self.assertEqual(obj, datastore.get(ref)) 

711 

712 def failInputDoesNotExist( 

713 obj: MetricsExample, 

714 path: str, 

715 ref: DatasetRef, 

716 mode: str | None = mode, 

717 datastore: Datastore = datastore, 

718 ) -> None: 

719 """Can't ingest files if we're given a bad path.""" 

720 with self.assertRaises(FileNotFoundError): 

721 datastore.ingest( 

722 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

723 ) 

724 self.assertFalse(datastore.exists(ref)) 

725 

726 def failOutsideRoot( 

727 obj: MetricsExample, 

728 path: str, 

729 ref: DatasetRef, 

730 mode: str | None = mode, 

731 datastore: Datastore = datastore, 

732 ) -> None: 

733 """Can't ingest files outside of datastore root unless 

734 auto. 

735 """ 

736 if mode == "auto": 

737 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

738 self.assertTrue(datastore.exists(ref)) 

739 else: 

740 with self.assertRaises(RuntimeError): 

741 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

742 self.assertFalse(datastore.exists(ref)) 

743 

744 def failNotImplemented( 

745 obj: MetricsExample, 

746 path: str, 

747 ref: DatasetRef, 

748 mode: str | None = mode, 

749 datastore: Datastore = datastore, 

750 ) -> None: 

751 with self.assertRaises(NotImplementedError): 

752 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

753 

754 if mode in self.ingestTransferModes: 

755 self.runIngestTest(failOutsideRoot) 

756 self.runIngestTest(failInputDoesNotExist) 

757 self.runIngestTest(succeed) 

758 else: 

759 self.runIngestTest(failNotImplemented) 

760 

761 def testIngestTransfer(self) -> None: 

762 """Test ingesting existing files after transferring them.""" 

763 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

764 with self.subTest(mode=mode): 

765 datastore = self.makeDatastore(mode) 

766 

767 def succeed( 

768 obj: MetricsExample, 

769 path: str, 

770 ref: DatasetRef, 

771 mode: str | None = mode, 

772 datastore: Datastore = datastore, 

773 ) -> None: 

774 """Ingest a file by transferring it to the template 

775 location. 

776 """ 

777 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

778 self.assertEqual(obj, datastore.get(ref)) 

779 

780 def failInputDoesNotExist( 

781 obj: MetricsExample, 

782 path: str, 

783 ref: DatasetRef, 

784 mode: str | None = mode, 

785 datastore: Datastore = datastore, 

786 ) -> None: 

787 """Can't ingest files if we're given a bad path.""" 

788 with self.assertRaises(FileNotFoundError): 

789 # Ensure the file does not look like it is in 

790 # datastore for auto mode 

791 datastore.ingest( 

792 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

793 ) 

794 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

795 

796 def failNotImplemented( 

797 obj: MetricsExample, 

798 path: str, 

799 ref: DatasetRef, 

800 mode: str | None = mode, 

801 datastore: Datastore = datastore, 

802 ) -> None: 

803 with self.assertRaises(NotImplementedError): 

804 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

805 

806 if mode in self.ingestTransferModes: 

807 self.runIngestTest(failInputDoesNotExist) 

808 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

809 else: 

810 self.runIngestTest(failNotImplemented) 

811 

812 def testIngestSymlinkOfSymlink(self) -> None: 

813 """Special test for symlink to a symlink ingest""" 

814 metrics, ref = self._prepareIngestTest() 

815 # The aim of this test is to create a dataset on disk, then 

816 # create a symlink to it and finally ingest the symlink such that 

817 # the symlink in the datastore points to the original dataset. 

818 for mode in ("symlink", "relsymlink"): 

819 if mode not in self.ingestTransferModes: 

820 continue 

821 

822 print(f"Trying mode {mode}") 

823 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

824 with open(realpath, "w") as fd: 

825 yaml.dump(metrics._asdict(), stream=fd) 

826 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

827 os.symlink(os.path.abspath(realpath), sympath) 

828 

829 datastore = self.makeDatastore() 

830 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

831 

832 uri = datastore.getURI(ref) 

833 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

834 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

835 

836 linkTarget = os.readlink(uri.ospath) 

837 if mode == "relsymlink": 

838 self.assertFalse(os.path.isabs(linkTarget)) 

839 else: 

840 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

841 

842 # Check that we can get the dataset back regardless of mode 

843 metric2 = datastore.get(ref) 

844 self.assertEqual(metric2, metrics) 

845 

846 # Cleanup the file for next time round loop 

847 # since it will get the same file name in store 

848 datastore.remove(ref) 

849 

850 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

851 datastore = self.makeDatastore(name) 

852 

853 # For now only the FileDatastore can be used for this test. 

854 # ChainedDatastore that only includes InMemoryDatastores have to be 

855 # skipped as well. 

856 for name in datastore.names: 

857 if not name.startswith("InMemoryDatastore"): 

858 break 

859 else: 

860 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

861 

862 metrics = makeExampleMetrics() 

863 dimensions = self.universe.extract(("visit", "physical_filter")) 

864 sc = self.storageClassFactory.getStorageClass("StructuredData") 

865 

866 refs = [] 

867 for visit in (2048, 2049, 2050): 

868 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

869 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

870 datastore.put(metrics, ref) 

871 refs.append(ref) 

872 return datastore, refs 

873 

874 def testExportImportRecords(self) -> None: 

875 """Test for export_records and import_records methods.""" 

876 datastore, refs = self._populate_export_datastore("test_datastore") 

877 for exported_refs in (refs, refs[1:]): 

878 n_refs = len(exported_refs) 

879 records = datastore.export_records(exported_refs) 

880 self.assertGreater(len(records), 0) 

881 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

882 # In a ChainedDatastore each FileDatastore will have a complete set 

883 for datastore_name in records: 

884 record_data = records[datastore_name] 

885 self.assertEqual(len(record_data.records), n_refs) 

886 

887 # Check that subsetting works, include non-existing dataset ID. 

888 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

889 subset = record_data.subset(dataset_ids) 

890 assert subset is not None 

891 self.assertEqual(len(subset.records), 1) 

892 subset = record_data.subset({uuid.uuid4()}) 

893 self.assertIsNone(subset) 

894 

895 # Use the same datastore name to import relative path. 

896 datastore2 = self.makeDatastore("test_datastore") 

897 

898 records = datastore.export_records(refs[1:]) 

899 datastore2.import_records(records) 

900 

901 with self.assertRaises(FileNotFoundError): 

902 data = datastore2.get(refs[0]) 

903 data = datastore2.get(refs[1]) 

904 self.assertIsNotNone(data) 

905 data = datastore2.get(refs[2]) 

906 self.assertIsNotNone(data) 

907 

908 def testExport(self) -> None: 

909 datastore, refs = self._populate_export_datastore("test_datastore") 

910 

911 datasets = list(datastore.export(refs)) 

912 self.assertEqual(len(datasets), 3) 

913 

914 for transfer in (None, "auto"): 

915 # Both will default to None 

916 datasets = list(datastore.export(refs, transfer=transfer)) 

917 self.assertEqual(len(datasets), 3) 

918 

919 with self.assertRaises(TypeError): 

920 list(datastore.export(refs, transfer="copy")) 

921 

922 with self.assertRaises(TypeError): 

923 list(datastore.export(refs, directory="exportDir", transfer="move")) 

924 

925 # Create a new ref that is not known to the datastore and try to 

926 # export it. 

927 sc = self.storageClassFactory.getStorageClass("ThingOne") 

928 dimensions = self.universe.extract(("visit", "physical_filter")) 

929 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

930 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

931 with self.assertRaises(FileNotFoundError): 

932 list(datastore.export(refs + [ref], transfer=None)) 

933 

934 def test_pydantic_dict_storage_class_conversions(self) -> None: 

935 """Test converting a dataset stored as a pydantic model into a dict on 

936 read. 

937 """ 

938 datastore = self.makeDatastore() 

939 store_as_model = self.makeDatasetRef( 

940 "store_as_model", 

941 dimensions=self.universe.empty, 

942 storageClass="DictConvertibleModel", 

943 dataId=DataCoordinate.makeEmpty(self.universe), 

944 ) 

945 content = {"a": "one", "b": "two"} 

946 model = DictConvertibleModel.from_dict(content, extra="original content") 

947 datastore.put(model, store_as_model) 

948 retrieved_model = datastore.get(store_as_model) 

949 self.assertEqual(retrieved_model, model) 

950 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

951 self.assertEqual(type(loaded), dict) 

952 self.assertEqual(loaded, content) 

953 

954 def test_simple_class_put_get(self) -> None: 

955 """Test that we can put and get a simple class with dict() 

956 constructor. 

957 """ 

958 datastore = self.makeDatastore() 

959 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

960 self._assert_different_puts(datastore, "MetricsExample", data) 

961 

962 def test_dataclass_put_get(self) -> None: 

963 """Test that we can put and get a simple dataclass.""" 

964 datastore = self.makeDatastore() 

965 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

966 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

967 

968 def test_pydantic_put_get(self) -> None: 

969 """Test that we can put and get a simple Pydantic model.""" 

970 datastore = self.makeDatastore() 

971 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

972 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

973 

974 def test_tuple_put_get(self) -> None: 

975 """Test that we can put and get a tuple.""" 

976 datastore = self.makeDatastore() 

977 data = tuple(["a", "b", 1]) 

978 self._assert_different_puts(datastore, "TupleExample", data) 

979 

980 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

981 refs = { 

982 x: self.makeDatasetRef( 

983 f"stora_as_{x}", 

984 dimensions=self.universe.empty, 

985 storageClass=f"{storageClass_root}{x}", 

986 dataId=DataCoordinate.makeEmpty(self.universe), 

987 ) 

988 for x in ["A", "B"] 

989 } 

990 

991 for ref in refs.values(): 

992 datastore.put(data, ref) 

993 

994 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

995 

996 

997class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

998 """PosixDatastore specialization""" 

999 

1000 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1001 uriScheme = "file" 

1002 canIngestNoTransferAuto = True 

1003 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1004 isEphemeral = False 

1005 rootKeys = ("root",) 

1006 validationCanFail = True 

1007 

1008 def setUp(self) -> None: 

1009 # Override the working directory before calling the base class 

1010 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1011 super().setUp() 

1012 

1013 def testAtomicWrite(self) -> None: 

1014 """Test that we write to a temporary and then rename""" 

1015 datastore = self.makeDatastore() 

1016 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1017 dimensions = self.universe.extract(("visit", "physical_filter")) 

1018 metrics = makeExampleMetrics() 

1019 

1020 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

1021 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1022 

1023 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1024 datastore.put(metrics, ref) 

1025 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1026 self.assertIn("transfer=move", move_logs[0]) 

1027 

1028 # And the transfer should be file to file. 

1029 self.assertEqual(move_logs[0].count("file://"), 2) 

1030 

1031 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1032 """Verify that the expected exception is raised if the FileDatastore 

1033 can not determine the put formatter location. 

1034 """ 

1035 _ = makeExampleMetrics() 

1036 datastore = self.makeDatastore() 

1037 

1038 # Create multiple storage classes for testing different formulations 

1039 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1040 

1041 sccomp = StorageClass("Dummy") 

1042 compositeStorageClass = StorageClass( 

1043 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1044 ) 

1045 

1046 dimensions = self.universe.extract(("visit", "physical_filter")) 

1047 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1048 

1049 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1050 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1051 

1052 def raiser(ref: DatasetRef) -> None: 

1053 raise DatasetTypeNotSupportedError() 

1054 

1055 with unittest.mock.patch.object( 

1056 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1057 "_determine_put_formatter_location", 

1058 side_effect=raiser, 

1059 ): 

1060 # verify the non-composite ref execution path: 

1061 with self.assertRaises(DatasetTypeNotSupportedError): 

1062 datastore.getURIs(ref, predict=True) 

1063 

1064 # verify the composite-ref execution path: 

1065 with self.assertRaises(DatasetTypeNotSupportedError): 

1066 datastore.getURIs(compRef, predict=True) 

1067 

1068 def test_roots(self): 

1069 datastore = self.makeDatastore() 

1070 

1071 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1072 for root in datastore.roots.values(): 

1073 if root is not None: 

1074 self.assertTrue(root.exists()) 

1075 

1076 

1077class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1078 """Posix datastore tests but with checksums disabled.""" 

1079 

1080 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1081 

1082 def testChecksum(self) -> None: 

1083 """Ensure that checksums have not been calculated.""" 

1084 datastore = self.makeDatastore() 

1085 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1086 dimensions = self.universe.extract(("visit", "physical_filter")) 

1087 metrics = makeExampleMetrics() 

1088 

1089 dataId = dict({"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"}) 

1090 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1091 

1092 # Configuration should have disabled checksum calculation 

1093 datastore.put(metrics, ref) 

1094 infos = datastore.getStoredItemsInfo(ref) 

1095 self.assertIsNone(infos[0].checksum) 

1096 

1097 # Remove put back but with checksums enabled explicitly 

1098 datastore.remove(ref) 

1099 datastore.useChecksum = True 

1100 datastore.put(metrics, ref) 

1101 

1102 infos = datastore.getStoredItemsInfo(ref) 

1103 self.assertIsNotNone(infos[0].checksum) 

1104 

1105 

1106class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1107 """Restrict trash test to FileDatastore.""" 

1108 

1109 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1110 

1111 def testTrash(self) -> None: 

1112 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1113 

1114 # Trash one of them. 

1115 ref = refs.pop() 

1116 uri = datastore.getURI(ref) 

1117 datastore.trash(ref) 

1118 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1119 datastore.emptyTrash() 

1120 self.assertFalse(uri.exists(), uri) 

1121 

1122 # Trash it again should be fine. 

1123 datastore.trash(ref) 

1124 

1125 # Trash multiple items at once. 

1126 subset = [refs.pop(), refs.pop()] 

1127 datastore.trash(subset) 

1128 datastore.emptyTrash() 

1129 

1130 # Remove a record and trash should do nothing. 

1131 # This is execution butler scenario. 

1132 ref = refs.pop() 

1133 uri = datastore.getURI(ref) 

1134 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1135 self.assertTrue(uri.exists()) 

1136 datastore.trash(ref) 

1137 datastore.emptyTrash() 

1138 self.assertTrue(uri.exists()) 

1139 

1140 # Switch on trust and it should delete the file. 

1141 datastore.trustGetRequest = True 

1142 datastore.trash([ref]) 

1143 self.assertFalse(uri.exists()) 

1144 

1145 # Remove multiples at once in trust mode. 

1146 subset = [refs.pop() for i in range(3)] 

1147 datastore.trash(subset) 

1148 datastore.trash(refs.pop()) # Check that a single ref can trash 

1149 

1150 

1151class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1152 """Test datastore cleans up on failure.""" 

1153 

1154 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1155 

1156 def setUp(self) -> None: 

1157 # Override the working directory before calling the base class 

1158 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1159 super().setUp() 

1160 

1161 def testCleanup(self) -> None: 

1162 """Test that a failed formatter write does cleanup a partial file.""" 

1163 metrics = makeExampleMetrics() 

1164 datastore = self.makeDatastore() 

1165 

1166 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1167 

1168 dimensions = self.universe.extract(("visit", "physical_filter")) 

1169 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1170 

1171 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1172 

1173 # Determine where the file will end up (we assume Formatters use 

1174 # the same file extension) 

1175 expectedUri = datastore.getURI(ref, predict=True) 

1176 self.assertEqual(expectedUri.fragment, "predicted") 

1177 

1178 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1179 

1180 # Try formatter that fails and formatter that fails and leaves 

1181 # a file behind 

1182 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1183 with self.subTest(formatter=formatter): 

1184 # Monkey patch the formatter 

1185 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1186 

1187 # Try to put the dataset, it should fail 

1188 with self.assertRaises(RuntimeError): 

1189 datastore.put(metrics, ref) 

1190 

1191 # Check that there is no file on disk 

1192 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1193 

1194 # Check that there is a directory 

1195 dir = expectedUri.dirname() 

1196 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1197 

1198 # Force YamlFormatter and check that this time a file is written 

1199 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1200 datastore.put(metrics, ref) 

1201 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1202 datastore.remove(ref) 

1203 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1204 

1205 

1206class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1207 """PosixDatastore specialization""" 

1208 

1209 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1210 uriScheme = "mem" 

1211 hasUnsupportedPut = False 

1212 ingestTransferModes = () 

1213 isEphemeral = True 

1214 rootKeys = None 

1215 validationCanFail = False 

1216 

1217 

1218class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1219 """ChainedDatastore specialization using a POSIXDatastore""" 

1220 

1221 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1222 hasUnsupportedPut = False 

1223 canIngestNoTransferAuto = False 

1224 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1225 isEphemeral = False 

1226 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1227 validationCanFail = True 

1228 

1229 

1230class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1231 """ChainedDatastore specialization using all InMemoryDatastore""" 

1232 

1233 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1234 validationCanFail = False 

1235 

1236 

1237class DatastoreConstraintsTests(DatastoreTestsBase): 

1238 """Basic tests of constraints model of Datastores.""" 

1239 

1240 def testConstraints(self) -> None: 

1241 """Test constraints model. Assumes that each test class has the 

1242 same constraints. 

1243 """ 

1244 metrics = makeExampleMetrics() 

1245 datastore = self.makeDatastore() 

1246 

1247 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1248 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1249 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1250 dataId = dict({"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"}) 

1251 

1252 # Write empty file suitable for ingest check (JSON and YAML variants) 

1253 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1254 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1255 for datasetTypeName, sc, accepted in ( 

1256 ("metric", sc1, True), 

1257 ("metric5", sc1, False), 

1258 ("metric33", sc1, True), 

1259 ("metric5", sc2, True), 

1260 ): 

1261 # Choose different temp file depending on StorageClass 

1262 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1263 

1264 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1265 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1266 if accepted: 

1267 datastore.put(metrics, ref) 

1268 self.assertTrue(datastore.exists(ref)) 

1269 datastore.remove(ref) 

1270 

1271 # Try ingest 

1272 if self.canIngest: 

1273 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1274 self.assertTrue(datastore.exists(ref)) 

1275 datastore.remove(ref) 

1276 else: 

1277 with self.assertRaises(DatasetTypeNotSupportedError): 

1278 datastore.put(metrics, ref) 

1279 self.assertFalse(datastore.exists(ref)) 

1280 

1281 # Again with ingest 

1282 if self.canIngest: 

1283 with self.assertRaises(DatasetTypeNotSupportedError): 

1284 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1285 self.assertFalse(datastore.exists(ref)) 

1286 

1287 

1288class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1289 """PosixDatastore specialization""" 

1290 

1291 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1292 canIngest = True 

1293 

1294 def setUp(self) -> None: 

1295 # Override the working directory before calling the base class 

1296 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1297 super().setUp() 

1298 

1299 

1300class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1301 """InMemoryDatastore specialization.""" 

1302 

1303 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1304 canIngest = False 

1305 

1306 

1307class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1308 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1309 at the ChainedDatstore. 

1310 """ 

1311 

1312 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1313 

1314 

1315class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1316 """ChainedDatastore specialization using a POSIXDatastore.""" 

1317 

1318 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1319 

1320 

1321class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1322 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1323 

1324 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1325 canIngest = False 

1326 

1327 

1328class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1329 """Test that a chained datastore can control constraints per-datastore 

1330 even if child datastore would accept. 

1331 """ 

1332 

1333 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1334 

1335 def setUp(self) -> None: 

1336 # Override the working directory before calling the base class 

1337 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1338 super().setUp() 

1339 

1340 def testConstraints(self) -> None: 

1341 """Test chained datastore constraints model.""" 

1342 metrics = makeExampleMetrics() 

1343 datastore = self.makeDatastore() 

1344 

1345 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1346 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1347 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1348 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1349 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1350 

1351 # Write empty file suitable for ingest check (JSON and YAML variants) 

1352 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1353 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1354 

1355 for typeName, dataId, sc, accept, ingest in ( 

1356 ("metric", dataId1, sc1, (False, True, False), True), 

1357 ("metric5", dataId1, sc1, (False, False, False), False), 

1358 ("metric5", dataId2, sc1, (True, False, False), False), 

1359 ("metric33", dataId2, sc2, (True, True, False), True), 

1360 ("metric5", dataId1, sc2, (False, True, False), True), 

1361 ): 

1362 # Choose different temp file depending on StorageClass 

1363 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1364 

1365 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1366 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1367 if any(accept): 

1368 datastore.put(metrics, ref) 

1369 self.assertTrue(datastore.exists(ref)) 

1370 

1371 # Check each datastore inside the chained datastore 

1372 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1373 self.assertEqual( 

1374 childDatastore.exists(ref), 

1375 expected, 

1376 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1377 ) 

1378 

1379 datastore.remove(ref) 

1380 

1381 # Check that ingest works 

1382 if ingest: 

1383 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1384 self.assertTrue(datastore.exists(ref)) 

1385 

1386 # Check each datastore inside the chained datastore 

1387 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1388 # Ephemeral datastores means InMemory at the moment 

1389 # and that does not accept ingest of files. 

1390 if childDatastore.isEphemeral: 

1391 expected = False 

1392 self.assertEqual( 

1393 childDatastore.exists(ref), 

1394 expected, 

1395 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1396 ) 

1397 

1398 datastore.remove(ref) 

1399 else: 

1400 with self.assertRaises(DatasetTypeNotSupportedError): 

1401 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1402 

1403 else: 

1404 with self.assertRaises(DatasetTypeNotSupportedError): 

1405 datastore.put(metrics, ref) 

1406 self.assertFalse(datastore.exists(ref)) 

1407 

1408 # Again with ingest 

1409 with self.assertRaises(DatasetTypeNotSupportedError): 

1410 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1411 self.assertFalse(datastore.exists(ref)) 

1412 

1413 

1414class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1415 """Tests for datastore caching infrastructure.""" 

1416 

1417 @classmethod 

1418 def setUpClass(cls) -> None: 

1419 cls.storageClassFactory = StorageClassFactory() 

1420 cls.universe = DimensionUniverse() 

1421 

1422 # Ensure that we load the test storage class definitions. 

1423 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1424 cls.storageClassFactory.addFromConfig(scConfigFile) 

1425 

1426 def setUp(self) -> None: 

1427 self.id = 0 

1428 

1429 # Create a root that we can use for caching tests. 

1430 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1431 

1432 # Create some test dataset refs and associated test files 

1433 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1434 dimensions = self.universe.extract(("visit", "physical_filter")) 

1435 dataId = dict({"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"}) 

1436 

1437 # Create list of refs and list of temporary files 

1438 n_datasets = 10 

1439 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1440 

1441 root_uri = ResourcePath(self.root, forceDirectory=True) 

1442 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1443 

1444 # Create test files. 

1445 for uri in self.files: 

1446 uri.write(b"0123456789") 

1447 

1448 # Create some composite refs with component files. 

1449 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1450 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1451 self.comp_files = [] 

1452 self.comp_refs = [] 

1453 for n, ref in enumerate(self.composite_refs): 

1454 component_refs = [] 

1455 component_files = [] 

1456 for component in sc.components: 

1457 component_ref = ref.makeComponentRef(component) 

1458 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1459 component_refs.append(component_ref) 

1460 component_files.append(file) 

1461 file.write(b"9876543210") 

1462 

1463 self.comp_files.append(component_files) 

1464 self.comp_refs.append(component_refs) 

1465 

1466 def tearDown(self) -> None: 

1467 if self.root is not None and os.path.exists(self.root): 

1468 shutil.rmtree(self.root, ignore_errors=True) 

1469 

1470 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1471 config = Config.fromYaml(config_str) 

1472 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1473 

1474 def testNoCacheDir(self) -> None: 

1475 config_str = """ 

1476cached: 

1477 root: null 

1478 cacheable: 

1479 metric0: true 

1480 """ 

1481 cache_manager = self._make_cache_manager(config_str) 

1482 

1483 # Look inside to check we don't have a cache directory 

1484 self.assertIsNone(cache_manager._cache_directory) 

1485 

1486 self.assertCache(cache_manager) 

1487 

1488 # Test that the cache directory is marked temporary 

1489 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1490 

1491 def testNoCacheDirReversed(self) -> None: 

1492 """Use default caching status and metric1 to false""" 

1493 config_str = """ 

1494cached: 

1495 root: null 

1496 default: true 

1497 cacheable: 

1498 metric1: false 

1499 """ 

1500 cache_manager = self._make_cache_manager(config_str) 

1501 

1502 self.assertCache(cache_manager) 

1503 

1504 def testEnvvarCacheDir(self) -> None: 

1505 config_str = f""" 

1506cached: 

1507 root: '{self.root}' 

1508 cacheable: 

1509 metric0: true 

1510 """ 

1511 

1512 root = ResourcePath(self.root, forceDirectory=True) 

1513 env_dir = root.join("somewhere", forceDirectory=True) 

1514 elsewhere = root.join("elsewhere", forceDirectory=True) 

1515 

1516 # Environment variable should override the config value. 

1517 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1518 cache_manager = self._make_cache_manager(config_str) 

1519 self.assertEqual(cache_manager.cache_directory, env_dir) 

1520 

1521 # This environment variable should not override the config value. 

1522 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1523 cache_manager = self._make_cache_manager(config_str) 

1524 self.assertEqual(cache_manager.cache_directory, root) 

1525 

1526 # No default setting. 

1527 config_str = """ 

1528cached: 

1529 root: null 

1530 default: true 

1531 cacheable: 

1532 metric1: false 

1533 """ 

1534 cache_manager = self._make_cache_manager(config_str) 

1535 

1536 # This environment variable should override the config value. 

1537 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1538 cache_manager = self._make_cache_manager(config_str) 

1539 self.assertEqual(cache_manager.cache_directory, env_dir) 

1540 

1541 # If both environment variables are set the main (not IF_UNSET) 

1542 # variable should win. 

1543 with unittest.mock.patch.dict( 

1544 os.environ, 

1545 { 

1546 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1547 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1548 }, 

1549 ): 

1550 cache_manager = self._make_cache_manager(config_str) 

1551 self.assertEqual(cache_manager.cache_directory, env_dir) 

1552 

1553 # Use the API to set the environment variable, making sure that the 

1554 # variable is reset on exit. 

1555 with unittest.mock.patch.dict( 

1556 os.environ, 

1557 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1558 ): 

1559 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1560 self.assertTrue(defined) 

1561 cache_manager = self._make_cache_manager(config_str) 

1562 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1563 

1564 # Now create the cache manager ahead of time and set the fallback 

1565 # later. 

1566 cache_manager = self._make_cache_manager(config_str) 

1567 self.assertIsNone(cache_manager._cache_directory) 

1568 with unittest.mock.patch.dict( 

1569 os.environ, 

1570 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1571 ): 

1572 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1573 self.assertTrue(defined) 

1574 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1575 

1576 def testExplicitCacheDir(self) -> None: 

1577 config_str = f""" 

1578cached: 

1579 root: '{self.root}' 

1580 cacheable: 

1581 metric0: true 

1582 """ 

1583 cache_manager = self._make_cache_manager(config_str) 

1584 

1585 # Look inside to check we do have a cache directory. 

1586 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1587 

1588 self.assertCache(cache_manager) 

1589 

1590 # Test that the cache directory is not marked temporary 

1591 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1592 

1593 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1594 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1595 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1596 

1597 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1598 self.assertIsInstance(uri, ResourcePath) 

1599 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1600 

1601 # Check presence in cache using ref and then using file extension. 

1602 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1603 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1604 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1605 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1606 

1607 # Cached file should no longer exist but uncached file should be 

1608 # unaffected. 

1609 self.assertFalse(self.files[0].exists()) 

1610 self.assertTrue(self.files[1].exists()) 

1611 

1612 # Should find this file and it should be within the cache directory. 

1613 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1614 self.assertTrue(found.exists()) 

1615 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1616 

1617 # Should not be able to find these in cache 

1618 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1619 self.assertIsNone(found) 

1620 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1621 self.assertIsNone(found) 

1622 

1623 def testNoCache(self) -> None: 

1624 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1625 for uri, ref in zip(self.files, self.refs, strict=True): 

1626 self.assertFalse(cache_manager.should_be_cached(ref)) 

1627 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1628 self.assertFalse(cache_manager.known_to_cache(ref)) 

1629 with cache_manager.find_in_cache(ref, ".txt") as found: 

1630 self.assertIsNone(found, msg=f"{cache_manager}") 

1631 

1632 def _expiration_config(self, mode: str, threshold: int) -> str: 

1633 return f""" 

1634cached: 

1635 default: true 

1636 expiry: 

1637 mode: {mode} 

1638 threshold: {threshold} 

1639 cacheable: 

1640 unused: true 

1641 """ 

1642 

1643 def testCacheExpiryFiles(self) -> None: 

1644 threshold = 2 # Keep at least 2 files. 

1645 mode = "files" 

1646 config_str = self._expiration_config(mode, threshold) 

1647 

1648 cache_manager = self._make_cache_manager(config_str) 

1649 

1650 # Check that an empty cache returns unknown for arbitrary ref 

1651 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1652 

1653 # Should end with datasets: 2, 3, 4 

1654 self.assertExpiration(cache_manager, 5, threshold + 1) 

1655 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1656 

1657 # Check that we will not expire a file that is actively in use. 

1658 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1659 self.assertIsNotNone(found) 

1660 

1661 # Trigger cache expiration that should remove the file 

1662 # we just retrieved. Should now have: 3, 4, 5 

1663 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1664 self.assertIsNotNone(cached) 

1665 

1666 # Cache should still report the standard file count. 

1667 self.assertEqual(cache_manager.file_count, threshold + 1) 

1668 

1669 # Add additional entry to cache. 

1670 # Should now have 4, 5, 6 

1671 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1672 self.assertIsNotNone(cached) 

1673 

1674 # Is the file still there? 

1675 self.assertTrue(found.exists()) 

1676 

1677 # Can we read it? 

1678 data = found.read() 

1679 self.assertGreater(len(data), 0) 

1680 

1681 # Outside context the file should no longer exist. 

1682 self.assertFalse(found.exists()) 

1683 

1684 # File count should not have changed. 

1685 self.assertEqual(cache_manager.file_count, threshold + 1) 

1686 

1687 # Dataset 2 was in the exempt directory but because hardlinks 

1688 # are used it was deleted from the main cache during cache expiry 

1689 # above and so should no longer be found. 

1690 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1691 self.assertIsNone(found) 

1692 

1693 # And the one stored after it is also gone. 

1694 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1695 self.assertIsNone(found) 

1696 

1697 # But dataset 4 is present. 

1698 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1699 self.assertIsNotNone(found) 

1700 

1701 # Adding a new dataset to the cache should now delete it. 

1702 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1703 

1704 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1705 self.assertIsNone(found) 

1706 

1707 def testCacheExpiryDatasets(self) -> None: 

1708 threshold = 2 # Keep 2 datasets. 

1709 mode = "datasets" 

1710 config_str = self._expiration_config(mode, threshold) 

1711 

1712 cache_manager = self._make_cache_manager(config_str) 

1713 self.assertExpiration(cache_manager, 5, threshold + 1) 

1714 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1715 

1716 def testCacheExpiryDatasetsComposite(self) -> None: 

1717 threshold = 2 # Keep 2 datasets. 

1718 mode = "datasets" 

1719 config_str = self._expiration_config(mode, threshold) 

1720 

1721 cache_manager = self._make_cache_manager(config_str) 

1722 

1723 n_datasets = 3 

1724 for i in range(n_datasets): 

1725 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1726 cached = cache_manager.move_to_cache(component_file, component_ref) 

1727 self.assertIsNotNone(cached) 

1728 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1729 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1730 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1731 

1732 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1733 

1734 # Write two new non-composite and the number of files should drop. 

1735 self.assertExpiration(cache_manager, 2, 5) 

1736 

1737 def testCacheExpirySize(self) -> None: 

1738 threshold = 55 # Each file is 10 bytes 

1739 mode = "size" 

1740 config_str = self._expiration_config(mode, threshold) 

1741 

1742 cache_manager = self._make_cache_manager(config_str) 

1743 self.assertExpiration(cache_manager, 10, 6) 

1744 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1745 

1746 def assertExpiration( 

1747 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1748 ) -> None: 

1749 """Insert the datasets and then check the number retained.""" 

1750 for i in range(n_datasets): 

1751 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1752 self.assertIsNotNone(cached) 

1753 

1754 self.assertEqual(cache_manager.file_count, n_retained) 

1755 

1756 # The oldest file should not be in the cache any more. 

1757 for i in range(n_datasets): 

1758 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1759 if i >= n_datasets - n_retained: 

1760 self.assertIsInstance(found, ResourcePath) 

1761 else: 

1762 self.assertIsNone(found) 

1763 

1764 def testCacheExpiryAge(self) -> None: 

1765 threshold = 1 # Expire older than 2 seconds 

1766 mode = "age" 

1767 config_str = self._expiration_config(mode, threshold) 

1768 

1769 cache_manager = self._make_cache_manager(config_str) 

1770 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1771 

1772 # Insert 3 files, then sleep, then insert more. 

1773 for i in range(2): 

1774 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1775 self.assertIsNotNone(cached) 

1776 time.sleep(2.0) 

1777 for j in range(4): 

1778 i = 2 + j # Continue the counting 

1779 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1780 self.assertIsNotNone(cached) 

1781 

1782 # Only the files written after the sleep should exist. 

1783 self.assertEqual(cache_manager.file_count, 4) 

1784 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1785 self.assertIsNone(found) 

1786 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1787 self.assertIsInstance(found, ResourcePath) 

1788 

1789 

1790class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1791 """Test the null datastore.""" 

1792 

1793 storageClassFactory = StorageClassFactory() 

1794 

1795 def test_basics(self) -> None: 

1796 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1797 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1798 

1799 null = NullDatastore(None, None) 

1800 

1801 self.assertFalse(null.exists(ref)) 

1802 self.assertFalse(null.knows(ref)) 

1803 knows = null.knows_these([ref]) 

1804 self.assertFalse(knows[ref]) 

1805 null.validateConfiguration(ref) 

1806 

1807 with self.assertRaises(FileNotFoundError): 

1808 null.get(ref) 

1809 with self.assertRaises(NotImplementedError): 

1810 null.put("", ref) 

1811 with self.assertRaises(FileNotFoundError): 

1812 null.getURI(ref) 

1813 with self.assertRaises(FileNotFoundError): 

1814 null.getURIs(ref) 

1815 with self.assertRaises(FileNotFoundError): 

1816 null.getManyURIs([ref]) 

1817 with self.assertRaises(NotImplementedError): 

1818 null.getLookupKeys() 

1819 with self.assertRaises(NotImplementedError): 

1820 null.import_records({}) 

1821 with self.assertRaises(NotImplementedError): 

1822 null.export_records([]) 

1823 with self.assertRaises(NotImplementedError): 

1824 null.export([ref]) 

1825 with self.assertRaises(NotImplementedError): 

1826 null.transfer(null, ref) 

1827 with self.assertRaises(NotImplementedError): 

1828 null.emptyTrash() 

1829 with self.assertRaises(NotImplementedError): 

1830 null.trash(ref) 

1831 with self.assertRaises(NotImplementedError): 

1832 null.forget([ref]) 

1833 with self.assertRaises(NotImplementedError): 

1834 null.remove(ref) 

1835 with self.assertRaises(NotImplementedError): 

1836 null.retrieveArtifacts([ref], ResourcePath(".")) 

1837 with self.assertRaises(NotImplementedError): 

1838 null.transfer_from(null, [ref]) 

1839 with self.assertRaises(NotImplementedError): 

1840 null.ingest() 

1841 

1842 

1843class DatasetRefURIsTestCase(unittest.TestCase): 

1844 """Tests for DatasetRefURIs.""" 

1845 

1846 def testSequenceAccess(self) -> None: 

1847 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1848 uris = DatasetRefURIs() 

1849 

1850 self.assertEqual(len(uris), 2) 

1851 self.assertEqual(uris[0], None) 

1852 self.assertEqual(uris[1], {}) 

1853 

1854 primaryURI = ResourcePath("1/2/3") 

1855 componentURI = ResourcePath("a/b/c") 

1856 

1857 # affirm that DatasetRefURIs does not support MutableSequence functions 

1858 with self.assertRaises(TypeError): 

1859 uris[0] = primaryURI 

1860 with self.assertRaises(TypeError): 

1861 uris[1] = {"foo": componentURI} 

1862 

1863 # but DatasetRefURIs can be set by property name: 

1864 uris.primaryURI = primaryURI 

1865 uris.componentURIs = {"foo": componentURI} 

1866 self.assertEqual(uris.primaryURI, primaryURI) 

1867 self.assertEqual(uris[0], primaryURI) 

1868 

1869 primary, components = uris 

1870 self.assertEqual(primary, primaryURI) 

1871 self.assertEqual(components, {"foo": componentURI}) 

1872 

1873 def testRepr(self) -> None: 

1874 """Verify __repr__ output.""" 

1875 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1876 self.assertEqual( 

1877 repr(uris), 

1878 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1879 ) 

1880 

1881 

1882class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1883 """Test the StoredFileInfo class.""" 

1884 

1885 storageClassFactory = StorageClassFactory() 

1886 

1887 def test_StoredFileInfo(self) -> None: 

1888 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1889 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1890 

1891 record = dict( 

1892 storage_class="StructuredDataDict", 

1893 formatter="lsst.daf.butler.Formatter", 

1894 path="a/b/c.txt", 

1895 component="component", 

1896 dataset_id=ref.id, 

1897 checksum=None, 

1898 file_size=5, 

1899 ) 

1900 info = StoredFileInfo.from_record(record) 

1901 

1902 self.assertEqual(info.dataset_id, ref.id) 

1903 self.assertEqual(info.to_record(), record) 

1904 

1905 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}) 

1906 rebased = info.rebase(ref2) 

1907 self.assertEqual(rebased.dataset_id, ref2.id) 

1908 self.assertEqual(rebased.rebase(ref), info) 

1909 

1910 with self.assertRaises(TypeError): 

1911 rebased.update(formatter=42) 

1912 

1913 with self.assertRaises(ValueError): 

1914 rebased.update(something=42, new="42") 

1915 

1916 # Check that pickle works on StoredFileInfo. 

1917 pickled_info = pickle.dumps(info) 

1918 unpickled_info = pickle.loads(pickled_info) 

1919 self.assertEqual(unpickled_info, info) 

1920 

1921 

1922if __name__ == "__main__": 

1923 unittest.main()