Coverage for tests/test_datastore.py: 11%

1075 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-06 10:53 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import os 

31import pickle 

32import shutil 

33import tempfile 

34import time 

35import unittest 

36import unittest.mock 

37import uuid 

38from collections.abc import Callable 

39from typing import Any, cast 

40 

41import lsst.utils.tests 

42import yaml 

43from lsst.daf.butler import ( 

44 Config, 

45 DataCoordinate, 

46 DatasetIdGenEnum, 

47 DatasetRef, 

48 DatasetType, 

49 DatasetTypeNotSupportedError, 

50 Datastore, 

51 DimensionUniverse, 

52 FileDataset, 

53 StorageClass, 

54 StorageClassFactory, 

55) 

56from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore 

57from lsst.daf.butler.datastore.cache_manager import ( 

58 DatastoreCacheManager, 

59 DatastoreCacheManagerConfig, 

60 DatastoreDisabledCacheManager, 

61) 

62from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo 

63from lsst.daf.butler.formatters.yaml import YamlFormatter 

64from lsst.daf.butler.tests import ( 

65 BadNoWriteFormatter, 

66 BadWriteFormatter, 

67 DatasetTestHelper, 

68 DatastoreTestHelper, 

69 DummyRegistry, 

70 MetricsExample, 

71 MetricsExampleDataclass, 

72 MetricsExampleModel, 

73) 

74from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

75from lsst.daf.butler.tests.utils import TestCaseMixin 

76from lsst.resources import ResourcePath 

77from lsst.utils import doImport 

78 

79TESTDIR = os.path.dirname(__file__) 

80 

81 

82def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

83 """Make example dataset that can be stored in butler.""" 

84 if use_none: 

85 array = None 

86 else: 

87 array = [563, 234, 456.7, 105, 2054, -1045] 

88 return MetricsExample( 

89 {"AM1": 5.2, "AM2": 30.6}, 

90 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

91 array, 

92 ) 

93 

94 

95class TransactionTestError(Exception): 

96 """Specific error for transactions, to prevent misdiagnosing 

97 that might otherwise occur when a standard exception is used. 

98 """ 

99 

100 pass 

101 

102 

103class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

104 """Support routines for datastore testing""" 

105 

106 root: str | None = None 

107 universe: DimensionUniverse 

108 storageClassFactory: StorageClassFactory 

109 

110 @classmethod 

111 def setUpClass(cls) -> None: 

112 # Storage Classes are fixed for all datastores in these tests 

113 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

114 cls.storageClassFactory = StorageClassFactory() 

115 cls.storageClassFactory.addFromConfig(scConfigFile) 

116 

117 # Read the Datastore config so we can get the class 

118 # information (since we should not assume the constructor 

119 # name here, but rely on the configuration file itself) 

120 datastoreConfig = DatastoreConfig(cls.configFile) 

121 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

122 cls.universe = DimensionUniverse() 

123 

124 def setUp(self) -> None: 

125 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

126 

127 def tearDown(self) -> None: 

128 if self.root is not None and os.path.exists(self.root): 

129 shutil.rmtree(self.root, ignore_errors=True) 

130 

131 

132class DatastoreTests(DatastoreTestsBase): 

133 """Some basic tests of a simple datastore.""" 

134 

135 hasUnsupportedPut = True 

136 rootKeys: tuple[str, ...] | None = None 

137 isEphemeral: bool = False 

138 validationCanFail: bool = False 

139 

140 def testConfigRoot(self) -> None: 

141 full = DatastoreConfig(self.configFile) 

142 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

143 newroot = "/random/location" 

144 self.datastoreType.setConfigRoot(newroot, config, full) 

145 if self.rootKeys: 

146 for k in self.rootKeys: 

147 self.assertIn(newroot, config[k]) 

148 

149 def testConstructor(self) -> None: 

150 datastore = self.makeDatastore() 

151 self.assertIsNotNone(datastore) 

152 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

153 

154 def testConfigurationValidation(self) -> None: 

155 datastore = self.makeDatastore() 

156 sc = self.storageClassFactory.getStorageClass("ThingOne") 

157 datastore.validateConfiguration([sc]) 

158 

159 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

160 if self.validationCanFail: 

161 with self.assertRaises(DatastoreValidationError): 

162 datastore.validateConfiguration([sc2], logFailures=True) 

163 

164 dimensions = self.universe.conform(("visit", "physical_filter")) 

165 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

166 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

167 datastore.validateConfiguration([ref]) 

168 

169 def testParameterValidation(self) -> None: 

170 """Check that parameters are validated""" 

171 sc = self.storageClassFactory.getStorageClass("ThingOne") 

172 dimensions = self.universe.conform(("visit", "physical_filter")) 

173 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

174 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

175 datastore = self.makeDatastore() 

176 data = {1: 2, 3: 4} 

177 datastore.put(data, ref) 

178 newdata = datastore.get(ref) 

179 self.assertEqual(data, newdata) 

180 with self.assertRaises(KeyError): 

181 newdata = datastore.get(ref, parameters={"missing": 5}) 

182 

183 def testBasicPutGet(self) -> None: 

184 metrics = makeExampleMetrics() 

185 datastore = self.makeDatastore() 

186 

187 # Create multiple storage classes for testing different formulations 

188 storageClasses = [ 

189 self.storageClassFactory.getStorageClass(sc) 

190 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

191 ] 

192 

193 dimensions = self.universe.conform(("visit", "physical_filter")) 

194 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

195 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} 

196 

197 for sc in storageClasses: 

198 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

199 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

200 

201 # Make sure that using getManyURIs without predicting before the 

202 # dataset has been put raises. 

203 with self.assertRaises(FileNotFoundError): 

204 datastore.getManyURIs([ref], predict=False) 

205 

206 # Make sure that using getManyURIs with predicting before the 

207 # dataset has been put predicts the URI. 

208 uris = datastore.getManyURIs([ref, ref2], predict=True) 

209 self.assertIn("52", uris[ref].primaryURI.geturl()) 

210 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

211 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

212 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

213 

214 datastore.put(metrics, ref) 

215 

216 # Does it exist? 

217 self.assertTrue(datastore.exists(ref)) 

218 self.assertTrue(datastore.knows(ref)) 

219 multi = datastore.knows_these([ref]) 

220 self.assertTrue(multi[ref]) 

221 multi = datastore.mexists([ref, ref2]) 

222 self.assertTrue(multi[ref]) 

223 self.assertFalse(multi[ref2]) 

224 

225 # Get 

226 metricsOut = datastore.get(ref, parameters=None) 

227 self.assertEqual(metrics, metricsOut) 

228 

229 uri = datastore.getURI(ref) 

230 self.assertEqual(uri.scheme, self.uriScheme) 

231 

232 uris = datastore.getManyURIs([ref]) 

233 self.assertEqual(len(uris), 1) 

234 ref, uri = uris.popitem() 

235 self.assertTrue(uri.primaryURI.exists()) 

236 self.assertFalse(uri.componentURIs) 

237 

238 # Get a component -- we need to construct new refs for them 

239 # with derived storage classes but with parent ID 

240 for comp in ("data", "output"): 

241 compRef = ref.makeComponentRef(comp) 

242 output = datastore.get(compRef) 

243 self.assertEqual(output, getattr(metricsOut, comp)) 

244 

245 uri = datastore.getURI(compRef) 

246 self.assertEqual(uri.scheme, self.uriScheme) 

247 

248 uris = datastore.getManyURIs([compRef]) 

249 self.assertEqual(len(uris), 1) 

250 

251 storageClass = sc 

252 

253 # Check that we can put a metric with None in a component and 

254 # get it back as None 

255 metricsNone = makeExampleMetrics(use_none=True) 

256 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

257 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

258 datastore.put(metricsNone, refNone) 

259 

260 comp = "data" 

261 for comp in ("data", "output"): 

262 compRef = refNone.makeComponentRef(comp) 

263 output = datastore.get(compRef) 

264 self.assertEqual(output, getattr(metricsNone, comp)) 

265 

266 # Check that a put fails if the dataset type is not supported 

267 if self.hasUnsupportedPut: 

268 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

269 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

270 with self.assertRaises(DatasetTypeNotSupportedError): 

271 datastore.put(metrics, ref) 

272 

273 # These should raise 

274 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

275 with self.assertRaises(FileNotFoundError): 

276 # non-existing file 

277 datastore.get(ref) 

278 

279 # Get a URI from it 

280 uri = datastore.getURI(ref, predict=True) 

281 self.assertEqual(uri.scheme, self.uriScheme) 

282 

283 with self.assertRaises(FileNotFoundError): 

284 datastore.getURI(ref) 

285 

286 def testTrustGetRequest(self) -> None: 

287 """Check that we can get datasets that registry knows nothing about.""" 

288 datastore = self.makeDatastore() 

289 

290 # Skip test if the attribute is not defined 

291 if not hasattr(datastore, "trustGetRequest"): 

292 return 

293 

294 metrics = makeExampleMetrics() 

295 

296 i = 0 

297 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

298 i += 1 

299 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

300 

301 if sc_name == "StructuredComposite": 

302 disassembled = True 

303 else: 

304 disassembled = False 

305 

306 # Start datastore in default configuration of using registry 

307 datastore.trustGetRequest = False 

308 

309 # Create multiple storage classes for testing with or without 

310 # disassembly 

311 sc = self.storageClassFactory.getStorageClass(sc_name) 

312 dimensions = self.universe.conform(("visit", "physical_filter")) 

313 

314 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} 

315 

316 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

317 datastore.put(metrics, ref) 

318 

319 # Does it exist? 

320 self.assertTrue(datastore.exists(ref)) 

321 self.assertTrue(datastore.knows(ref)) 

322 multi = datastore.knows_these([ref]) 

323 self.assertTrue(multi[ref]) 

324 multi = datastore.mexists([ref]) 

325 self.assertTrue(multi[ref]) 

326 

327 # Get 

328 metricsOut = datastore.get(ref) 

329 self.assertEqual(metrics, metricsOut) 

330 

331 # Get the URI(s) 

332 primaryURI, componentURIs = datastore.getURIs(ref) 

333 if disassembled: 

334 self.assertIsNone(primaryURI) 

335 self.assertEqual(len(componentURIs), 3) 

336 else: 

337 self.assertIn(datasetTypeName, primaryURI.path) 

338 self.assertFalse(componentURIs) 

339 

340 # Delete registry entry so now we are trusting 

341 datastore.removeStoredItemInfo(ref) 

342 

343 # Now stop trusting and check that things break 

344 datastore.trustGetRequest = False 

345 

346 # Does it exist? 

347 self.assertFalse(datastore.exists(ref)) 

348 self.assertFalse(datastore.knows(ref)) 

349 multi = datastore.knows_these([ref]) 

350 self.assertFalse(multi[ref]) 

351 multi = datastore.mexists([ref]) 

352 self.assertFalse(multi[ref]) 

353 

354 with self.assertRaises(FileNotFoundError): 

355 datastore.get(ref) 

356 

357 if sc_name != "StructuredDataNoComponents": 

358 with self.assertRaises(FileNotFoundError): 

359 datastore.get(ref.makeComponentRef("data")) 

360 

361 # URI should fail unless we ask for prediction 

362 with self.assertRaises(FileNotFoundError): 

363 datastore.getURIs(ref) 

364 

365 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

366 if disassembled: 

367 self.assertIsNone(predicted_primary) 

368 self.assertEqual(len(predicted_disassembled), 3) 

369 for uri in predicted_disassembled.values(): 

370 self.assertEqual(uri.fragment, "predicted") 

371 self.assertIn(datasetTypeName, uri.path) 

372 else: 

373 self.assertIn(datasetTypeName, predicted_primary.path) 

374 self.assertFalse(predicted_disassembled) 

375 self.assertEqual(predicted_primary.fragment, "predicted") 

376 

377 # Now enable registry-free trusting mode 

378 datastore.trustGetRequest = True 

379 

380 # Try again to get it 

381 metricsOut = datastore.get(ref) 

382 self.assertEqual(metricsOut, metrics) 

383 

384 # Does it exist? 

385 self.assertTrue(datastore.exists(ref)) 

386 

387 # Get a component 

388 if sc_name != "StructuredDataNoComponents": 

389 comp = "data" 

390 compRef = ref.makeComponentRef(comp) 

391 output = datastore.get(compRef) 

392 self.assertEqual(output, getattr(metrics, comp)) 

393 

394 # Get the URI -- if we trust this should work even without 

395 # enabling prediction. 

396 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

397 self.assertEqual(primaryURI2, primaryURI) 

398 self.assertEqual(componentURIs2, componentURIs) 

399 

400 # Check for compatible storage class. 

401 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

402 # Make new dataset ref with compatible storage class. 

403 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

404 

405 # Without `set_retrieve_dataset_type_method` it will fail to 

406 # find correct file. 

407 self.assertFalse(datastore.exists(ref_comp)) 

408 with self.assertRaises(FileNotFoundError): 

409 datastore.get(ref_comp) 

410 with self.assertRaises(FileNotFoundError): 

411 datastore.get(ref, storageClass="StructuredDataDictJson") 

412 

413 # Need a special method to generate stored dataset type. 

414 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

415 if name == ref.datasetType.name: 

416 return ref.datasetType 

417 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

418 

419 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

420 

421 # Storage class override with original dataset ref. 

422 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

423 self.assertIsInstance(metrics_as_dict, dict) 

424 

425 # get() should return a dict now. 

426 metrics_as_dict = datastore.get(ref_comp) 

427 self.assertIsInstance(metrics_as_dict, dict) 

428 

429 # exists() should work as well. 

430 self.assertTrue(datastore.exists(ref_comp)) 

431 

432 datastore.set_retrieve_dataset_type_method(None) 

433 

434 def testDisassembly(self) -> None: 

435 """Test disassembly within datastore.""" 

436 metrics = makeExampleMetrics() 

437 if self.isEphemeral: 

438 # in-memory datastore does not disassemble 

439 return 

440 

441 # Create multiple storage classes for testing different formulations 

442 # of composites. One of these will not disassemble to provide 

443 # a reference. 

444 storageClasses = [ 

445 self.storageClassFactory.getStorageClass(sc) 

446 for sc in ( 

447 "StructuredComposite", 

448 "StructuredCompositeTestA", 

449 "StructuredCompositeTestB", 

450 "StructuredCompositeReadComp", 

451 "StructuredData", # No disassembly 

452 "StructuredCompositeReadCompNoDisassembly", 

453 ) 

454 ] 

455 

456 # Create the test datastore 

457 datastore = self.makeDatastore() 

458 

459 # Dummy dataId 

460 dimensions = self.universe.conform(("visit", "physical_filter")) 

461 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

462 

463 for i, sc in enumerate(storageClasses): 

464 with self.subTest(storageClass=sc.name): 

465 # Create a different dataset type each time round 

466 # so that a test failure in this subtest does not trigger 

467 # a cascade of tests because of file clashes 

468 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

469 

470 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

471 

472 datastore.put(metrics, ref) 

473 

474 baseURI, compURIs = datastore.getURIs(ref) 

475 if disassembled: 

476 self.assertIsNone(baseURI) 

477 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

478 else: 

479 self.assertIsNotNone(baseURI) 

480 self.assertEqual(compURIs, {}) 

481 

482 metrics_get = datastore.get(ref) 

483 self.assertEqual(metrics_get, metrics) 

484 

485 # Retrieve the composite with read parameter 

486 stop = 4 

487 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

488 self.assertEqual(metrics_get.summary, metrics.summary) 

489 self.assertEqual(metrics_get.output, metrics.output) 

490 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

491 

492 # Retrieve a component 

493 data = datastore.get(ref.makeComponentRef("data")) 

494 self.assertEqual(data, metrics.data) 

495 

496 # On supported storage classes attempt to access a read 

497 # only component 

498 if "ReadComp" in sc.name: 

499 cRef = ref.makeComponentRef("counter") 

500 counter = datastore.get(cRef) 

501 self.assertEqual(counter, len(metrics.data)) 

502 

503 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

504 self.assertEqual(counter, stop) 

505 

506 datastore.remove(ref) 

507 

508 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

509 metrics = makeExampleMetrics() 

510 datastore = self.makeDatastore() 

511 # Put 

512 dimensions = self.universe.conform(("visit", "physical_filter")) 

513 sc = self.storageClassFactory.getStorageClass("StructuredData") 

514 refs = [] 

515 for i in range(n_refs): 

516 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

517 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

518 datastore.put(metrics, ref) 

519 

520 # Does it exist? 

521 self.assertTrue(datastore.exists(ref)) 

522 

523 # Get 

524 metricsOut = datastore.get(ref) 

525 self.assertEqual(metrics, metricsOut) 

526 refs.append(ref) 

527 

528 return datastore, *refs 

529 

530 def testRemove(self) -> None: 

531 datastore, ref = self.prepDeleteTest() 

532 

533 # Remove 

534 datastore.remove(ref) 

535 

536 # Does it exist? 

537 self.assertFalse(datastore.exists(ref)) 

538 

539 # Do we now get a predicted URI? 

540 uri = datastore.getURI(ref, predict=True) 

541 self.assertEqual(uri.fragment, "predicted") 

542 

543 # Get should now fail 

544 with self.assertRaises(FileNotFoundError): 

545 datastore.get(ref) 

546 # Can only delete once 

547 with self.assertRaises(FileNotFoundError): 

548 datastore.remove(ref) 

549 

550 def testForget(self) -> None: 

551 datastore, ref = self.prepDeleteTest() 

552 

553 # Remove 

554 datastore.forget([ref]) 

555 

556 # Does it exist (as far as we know)? 

557 self.assertFalse(datastore.exists(ref)) 

558 

559 # Do we now get a predicted URI? 

560 uri = datastore.getURI(ref, predict=True) 

561 self.assertEqual(uri.fragment, "predicted") 

562 

563 # Get should now fail 

564 with self.assertRaises(FileNotFoundError): 

565 datastore.get(ref) 

566 

567 # Forgetting again is a silent no-op 

568 datastore.forget([ref]) 

569 

570 # Predicted URI should still point to the file. 

571 self.assertTrue(uri.exists()) 

572 

573 def testTransfer(self) -> None: 

574 metrics = makeExampleMetrics() 

575 

576 dimensions = self.universe.conform(("visit", "physical_filter")) 

577 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} 

578 

579 sc = self.storageClassFactory.getStorageClass("StructuredData") 

580 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

581 

582 inputDatastore = self.makeDatastore("test_input_datastore") 

583 outputDatastore = self.makeDatastore("test_output_datastore") 

584 

585 inputDatastore.put(metrics, ref) 

586 outputDatastore.transfer(inputDatastore, ref) 

587 

588 metricsOut = outputDatastore.get(ref) 

589 self.assertEqual(metrics, metricsOut) 

590 

591 def testBasicTransaction(self) -> None: 

592 datastore = self.makeDatastore() 

593 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

594 dimensions = self.universe.conform(("visit", "physical_filter")) 

595 nDatasets = 6 

596 dataIds = [ 

597 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) 

598 ] 

599 data = [ 

600 ( 

601 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

602 makeExampleMetrics(), 

603 ) 

604 for dataId in dataIds 

605 ] 

606 succeed = data[: nDatasets // 2] 

607 fail = data[nDatasets // 2 :] 

608 # All datasets added in this transaction should continue to exist 

609 with datastore.transaction(): 

610 for ref, metrics in succeed: 

611 datastore.put(metrics, ref) 

612 # Whereas datasets added in this transaction should not 

613 with self.assertRaises(TransactionTestError): 

614 with datastore.transaction(): 

615 for ref, metrics in fail: 

616 datastore.put(metrics, ref) 

617 raise TransactionTestError("This should propagate out of the context manager") 

618 # Check for datasets that should exist 

619 for ref, metrics in succeed: 

620 # Does it exist? 

621 self.assertTrue(datastore.exists(ref)) 

622 # Get 

623 metricsOut = datastore.get(ref, parameters=None) 

624 self.assertEqual(metrics, metricsOut) 

625 # URI 

626 uri = datastore.getURI(ref) 

627 self.assertEqual(uri.scheme, self.uriScheme) 

628 # Check for datasets that should not exist 

629 for ref, _ in fail: 

630 # These should raise 

631 with self.assertRaises(FileNotFoundError): 

632 # non-existing file 

633 datastore.get(ref) 

634 with self.assertRaises(FileNotFoundError): 

635 datastore.getURI(ref) 

636 

637 def testNestedTransaction(self) -> None: 

638 datastore = self.makeDatastore() 

639 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

640 dimensions = self.universe.conform(("visit", "physical_filter")) 

641 metrics = makeExampleMetrics() 

642 

643 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

644 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

645 datastore.put(metrics, refBefore) 

646 with self.assertRaises(TransactionTestError): 

647 with datastore.transaction(): 

648 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"} 

649 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

650 datastore.put(metrics, refOuter) 

651 with datastore.transaction(): 

652 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"} 

653 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

654 datastore.put(metrics, refInner) 

655 # All datasets should exist 

656 for ref in (refBefore, refOuter, refInner): 

657 metricsOut = datastore.get(ref, parameters=None) 

658 self.assertEqual(metrics, metricsOut) 

659 raise TransactionTestError("This should roll back the transaction") 

660 # Dataset(s) inserted before the transaction should still exist 

661 metricsOut = datastore.get(refBefore, parameters=None) 

662 self.assertEqual(metrics, metricsOut) 

663 # But all datasets inserted during the (rolled back) transaction 

664 # should be gone 

665 with self.assertRaises(FileNotFoundError): 

666 datastore.get(refOuter) 

667 with self.assertRaises(FileNotFoundError): 

668 datastore.get(refInner) 

669 

670 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

671 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

672 dimensions = self.universe.conform(("visit", "physical_filter")) 

673 metrics = makeExampleMetrics() 

674 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

675 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

676 return metrics, ref 

677 

678 def runIngestTest( 

679 self, func: Callable[[MetricsExample, str, DatasetRef], None], expectOutput: bool = True 

680 ) -> None: 

681 metrics, ref = self._prepareIngestTest() 

682 # The file will be deleted after the test. 

683 # For symlink tests this leads to a situation where the datastore 

684 # points to a file that does not exist. This will make os.path.exist 

685 # return False but then the new symlink will fail with 

686 # FileExistsError later in the code so the test still passes. 

687 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

688 with open(path, "w") as fd: 

689 yaml.dump(metrics._asdict(), stream=fd) 

690 func(metrics, path, ref) 

691 

692 def testIngestNoTransfer(self) -> None: 

693 """Test ingesting existing files with no transfer.""" 

694 for mode in (None, "auto"): 

695 # Some datastores have auto but can't do in place transfer 

696 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

697 continue 

698 

699 with self.subTest(mode=mode): 

700 datastore = self.makeDatastore() 

701 

702 def succeed( 

703 obj: MetricsExample, 

704 path: str, 

705 ref: DatasetRef, 

706 mode: str | None = mode, 

707 datastore: Datastore = datastore, 

708 ) -> None: 

709 """Ingest a file already in the datastore root.""" 

710 # first move it into the root, and adjust the path 

711 # accordingly 

712 path = shutil.copy(path, datastore.root.ospath) 

713 path = os.path.relpath(path, start=datastore.root.ospath) 

714 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

715 self.assertEqual(obj, datastore.get(ref)) 

716 

717 def failInputDoesNotExist( 

718 obj: MetricsExample, 

719 path: str, 

720 ref: DatasetRef, 

721 mode: str | None = mode, 

722 datastore: Datastore = datastore, 

723 ) -> None: 

724 """Can't ingest files if we're given a bad path.""" 

725 with self.assertRaises(FileNotFoundError): 

726 datastore.ingest( 

727 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

728 ) 

729 self.assertFalse(datastore.exists(ref)) 

730 

731 def failOutsideRoot( 

732 obj: MetricsExample, 

733 path: str, 

734 ref: DatasetRef, 

735 mode: str | None = mode, 

736 datastore: Datastore = datastore, 

737 ) -> None: 

738 """Can't ingest files outside of datastore root unless 

739 auto. 

740 """ 

741 if mode == "auto": 

742 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

743 self.assertTrue(datastore.exists(ref)) 

744 else: 

745 with self.assertRaises(RuntimeError): 

746 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

747 self.assertFalse(datastore.exists(ref)) 

748 

749 def failNotImplemented( 

750 obj: MetricsExample, 

751 path: str, 

752 ref: DatasetRef, 

753 mode: str | None = mode, 

754 datastore: Datastore = datastore, 

755 ) -> None: 

756 with self.assertRaises(NotImplementedError): 

757 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

758 

759 if mode in self.ingestTransferModes: 

760 self.runIngestTest(failOutsideRoot) 

761 self.runIngestTest(failInputDoesNotExist) 

762 self.runIngestTest(succeed) 

763 else: 

764 self.runIngestTest(failNotImplemented) 

765 

766 def testIngestTransfer(self) -> None: 

767 """Test ingesting existing files after transferring them.""" 

768 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

769 with self.subTest(mode=mode): 

770 datastore = self.makeDatastore(mode) 

771 

772 def succeed( 

773 obj: MetricsExample, 

774 path: str, 

775 ref: DatasetRef, 

776 mode: str | None = mode, 

777 datastore: Datastore = datastore, 

778 ) -> None: 

779 """Ingest a file by transferring it to the template 

780 location. 

781 """ 

782 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

783 self.assertEqual(obj, datastore.get(ref)) 

784 

785 def failInputDoesNotExist( 

786 obj: MetricsExample, 

787 path: str, 

788 ref: DatasetRef, 

789 mode: str | None = mode, 

790 datastore: Datastore = datastore, 

791 ) -> None: 

792 """Can't ingest files if we're given a bad path.""" 

793 with self.assertRaises(FileNotFoundError): 

794 # Ensure the file does not look like it is in 

795 # datastore for auto mode 

796 datastore.ingest( 

797 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

798 ) 

799 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

800 

801 def failNotImplemented( 

802 obj: MetricsExample, 

803 path: str, 

804 ref: DatasetRef, 

805 mode: str | None = mode, 

806 datastore: Datastore = datastore, 

807 ) -> None: 

808 with self.assertRaises(NotImplementedError): 

809 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

810 

811 if mode in self.ingestTransferModes: 

812 self.runIngestTest(failInputDoesNotExist) 

813 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

814 else: 

815 self.runIngestTest(failNotImplemented) 

816 

817 def testIngestSymlinkOfSymlink(self) -> None: 

818 """Special test for symlink to a symlink ingest""" 

819 metrics, ref = self._prepareIngestTest() 

820 # The aim of this test is to create a dataset on disk, then 

821 # create a symlink to it and finally ingest the symlink such that 

822 # the symlink in the datastore points to the original dataset. 

823 for mode in ("symlink", "relsymlink"): 

824 if mode not in self.ingestTransferModes: 

825 continue 

826 

827 print(f"Trying mode {mode}") 

828 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

829 with open(realpath, "w") as fd: 

830 yaml.dump(metrics._asdict(), stream=fd) 

831 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

832 os.symlink(os.path.abspath(realpath), sympath) 

833 

834 datastore = self.makeDatastore() 

835 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

836 

837 uri = datastore.getURI(ref) 

838 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

839 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

840 

841 linkTarget = os.readlink(uri.ospath) 

842 if mode == "relsymlink": 

843 self.assertFalse(os.path.isabs(linkTarget)) 

844 else: 

845 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

846 

847 # Check that we can get the dataset back regardless of mode 

848 metric2 = datastore.get(ref) 

849 self.assertEqual(metric2, metrics) 

850 

851 # Cleanup the file for next time round loop 

852 # since it will get the same file name in store 

853 datastore.remove(ref) 

854 

855 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

856 datastore = self.makeDatastore(name) 

857 

858 # For now only the FileDatastore can be used for this test. 

859 # ChainedDatastore that only includes InMemoryDatastores have to be 

860 # skipped as well. 

861 for name in datastore.names: 

862 if not name.startswith("InMemoryDatastore"): 

863 break 

864 else: 

865 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

866 

867 metrics = makeExampleMetrics() 

868 dimensions = self.universe.conform(("visit", "physical_filter")) 

869 sc = self.storageClassFactory.getStorageClass("StructuredData") 

870 

871 refs = [] 

872 for visit in (2048, 2049, 2050): 

873 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

874 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

875 datastore.put(metrics, ref) 

876 refs.append(ref) 

877 return datastore, refs 

878 

879 def testExportImportRecords(self) -> None: 

880 """Test for export_records and import_records methods.""" 

881 datastore, refs = self._populate_export_datastore("test_datastore") 

882 for exported_refs in (refs, refs[1:]): 

883 n_refs = len(exported_refs) 

884 records = datastore.export_records(exported_refs) 

885 self.assertGreater(len(records), 0) 

886 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

887 # In a ChainedDatastore each FileDatastore will have a complete set 

888 for datastore_name in records: 

889 record_data = records[datastore_name] 

890 self.assertEqual(len(record_data.records), n_refs) 

891 

892 # Check that subsetting works, include non-existing dataset ID. 

893 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

894 subset = record_data.subset(dataset_ids) 

895 assert subset is not None 

896 self.assertEqual(len(subset.records), 1) 

897 subset = record_data.subset({uuid.uuid4()}) 

898 self.assertIsNone(subset) 

899 

900 # Use the same datastore name to import relative path. 

901 datastore2 = self.makeDatastore("test_datastore") 

902 

903 records = datastore.export_records(refs[1:]) 

904 datastore2.import_records(records) 

905 

906 with self.assertRaises(FileNotFoundError): 

907 data = datastore2.get(refs[0]) 

908 data = datastore2.get(refs[1]) 

909 self.assertIsNotNone(data) 

910 data = datastore2.get(refs[2]) 

911 self.assertIsNotNone(data) 

912 

913 def testExport(self) -> None: 

914 datastore, refs = self._populate_export_datastore("test_datastore") 

915 

916 datasets = list(datastore.export(refs)) 

917 self.assertEqual(len(datasets), 3) 

918 

919 for transfer in (None, "auto"): 

920 # Both will default to None 

921 datasets = list(datastore.export(refs, transfer=transfer)) 

922 self.assertEqual(len(datasets), 3) 

923 

924 with self.assertRaises(TypeError): 

925 list(datastore.export(refs, transfer="copy")) 

926 

927 with self.assertRaises(TypeError): 

928 list(datastore.export(refs, directory="exportDir", transfer="move")) 

929 

930 # Create a new ref that is not known to the datastore and try to 

931 # export it. 

932 sc = self.storageClassFactory.getStorageClass("ThingOne") 

933 dimensions = self.universe.conform(("visit", "physical_filter")) 

934 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

935 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

936 with self.assertRaises(FileNotFoundError): 

937 list(datastore.export(refs + [ref], transfer=None)) 

938 

939 def test_pydantic_dict_storage_class_conversions(self) -> None: 

940 """Test converting a dataset stored as a pydantic model into a dict on 

941 read. 

942 """ 

943 datastore = self.makeDatastore() 

944 store_as_model = self.makeDatasetRef( 

945 "store_as_model", 

946 dimensions=self.universe.empty, 

947 storageClass="DictConvertibleModel", 

948 dataId=DataCoordinate.make_empty(self.universe), 

949 ) 

950 content = {"a": "one", "b": "two"} 

951 model = DictConvertibleModel.from_dict(content, extra="original content") 

952 datastore.put(model, store_as_model) 

953 retrieved_model = datastore.get(store_as_model) 

954 self.assertEqual(retrieved_model, model) 

955 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

956 self.assertEqual(type(loaded), dict) 

957 self.assertEqual(loaded, content) 

958 

959 def test_simple_class_put_get(self) -> None: 

960 """Test that we can put and get a simple class with dict() 

961 constructor. 

962 """ 

963 datastore = self.makeDatastore() 

964 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

965 self._assert_different_puts(datastore, "MetricsExample", data) 

966 

967 def test_dataclass_put_get(self) -> None: 

968 """Test that we can put and get a simple dataclass.""" 

969 datastore = self.makeDatastore() 

970 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

971 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

972 

973 def test_pydantic_put_get(self) -> None: 

974 """Test that we can put and get a simple Pydantic model.""" 

975 datastore = self.makeDatastore() 

976 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

977 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

978 

979 def test_tuple_put_get(self) -> None: 

980 """Test that we can put and get a tuple.""" 

981 datastore = self.makeDatastore() 

982 data = ("a", "b", 1) 

983 self._assert_different_puts(datastore, "TupleExample", data) 

984 

985 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

986 refs = { 

987 x: self.makeDatasetRef( 

988 f"stora_as_{x}", 

989 dimensions=self.universe.empty, 

990 storageClass=f"{storageClass_root}{x}", 

991 dataId=DataCoordinate.make_empty(self.universe), 

992 ) 

993 for x in ["A", "B"] 

994 } 

995 

996 for ref in refs.values(): 

997 datastore.put(data, ref) 

998 

999 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1000 

1001 

1002class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1003 """PosixDatastore specialization""" 

1004 

1005 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1006 uriScheme = "file" 

1007 canIngestNoTransferAuto = True 

1008 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1009 isEphemeral = False 

1010 rootKeys = ("root",) 

1011 validationCanFail = True 

1012 

1013 def setUp(self) -> None: 

1014 # Override the working directory before calling the base class 

1015 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1016 super().setUp() 

1017 

1018 def testAtomicWrite(self) -> None: 

1019 """Test that we write to a temporary and then rename""" 

1020 datastore = self.makeDatastore() 

1021 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1022 dimensions = self.universe.conform(("visit", "physical_filter")) 

1023 metrics = makeExampleMetrics() 

1024 

1025 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1026 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1027 

1028 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1029 datastore.put(metrics, ref) 

1030 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1031 self.assertIn("transfer=move", move_logs[0]) 

1032 

1033 # And the transfer should be file to file. 

1034 self.assertEqual(move_logs[0].count("file://"), 2) 

1035 

1036 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1037 """Verify that the expected exception is raised if the FileDatastore 

1038 can not determine the put formatter location. 

1039 """ 

1040 _ = makeExampleMetrics() 

1041 datastore = self.makeDatastore() 

1042 

1043 # Create multiple storage classes for testing different formulations 

1044 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1045 

1046 sccomp = StorageClass("Dummy") 

1047 compositeStorageClass = StorageClass( 

1048 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1049 ) 

1050 

1051 dimensions = self.universe.conform(("visit", "physical_filter")) 

1052 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1053 

1054 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1055 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1056 

1057 def raiser(ref: DatasetRef) -> None: 

1058 raise DatasetTypeNotSupportedError() 

1059 

1060 with unittest.mock.patch.object( 

1061 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1062 "_determine_put_formatter_location", 

1063 side_effect=raiser, 

1064 ): 

1065 # verify the non-composite ref execution path: 

1066 with self.assertRaises(DatasetTypeNotSupportedError): 

1067 datastore.getURIs(ref, predict=True) 

1068 

1069 # verify the composite-ref execution path: 

1070 with self.assertRaises(DatasetTypeNotSupportedError): 

1071 datastore.getURIs(compRef, predict=True) 

1072 

1073 def test_roots(self): 

1074 datastore = self.makeDatastore() 

1075 

1076 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1077 for root in datastore.roots.values(): 

1078 if root is not None: 

1079 self.assertTrue(root.exists()) 

1080 

1081 

1082class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1083 """Posix datastore tests but with checksums disabled.""" 

1084 

1085 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1086 

1087 def testChecksum(self) -> None: 

1088 """Ensure that checksums have not been calculated.""" 

1089 datastore = self.makeDatastore() 

1090 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1091 dimensions = self.universe.conform(("visit", "physical_filter")) 

1092 metrics = makeExampleMetrics() 

1093 

1094 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1095 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1096 

1097 # Configuration should have disabled checksum calculation 

1098 datastore.put(metrics, ref) 

1099 infos = datastore.getStoredItemsInfo(ref) 

1100 self.assertIsNone(infos[0].checksum) 

1101 

1102 # Remove put back but with checksums enabled explicitly 

1103 datastore.remove(ref) 

1104 datastore.useChecksum = True 

1105 datastore.put(metrics, ref) 

1106 

1107 infos = datastore.getStoredItemsInfo(ref) 

1108 self.assertIsNotNone(infos[0].checksum) 

1109 

1110 def test_repeat_ingest(self): 

1111 """Test that repeatedly ingesting the same file in direct mode 

1112 is allowed. 

1113 

1114 Test can only run with FileDatastore since that is the only one 

1115 supporting "direct" ingest. 

1116 """ 

1117 metrics, v4ref = self._prepareIngestTest() 

1118 datastore = self.makeDatastore() 

1119 v5ref = DatasetRef( 

1120 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN 

1121 ) 

1122 

1123 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=True) as path: 

1124 with open(path, "w") as fd: 

1125 yaml.dump(metrics._asdict(), stream=fd) 

1126 

1127 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1128 

1129 # This will fail because the ref is using UUIDv4. 

1130 with self.assertRaises(RuntimeError): 

1131 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1132 

1133 # UUIDv5 can be repeatedly ingested in direct mode. 

1134 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1135 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1136 

1137 with self.assertRaises(RuntimeError): 

1138 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy") 

1139 

1140 

1141class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1142 """Restrict trash test to FileDatastore.""" 

1143 

1144 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1145 

1146 def testTrash(self) -> None: 

1147 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1148 

1149 # Trash one of them. 

1150 ref = refs.pop() 

1151 uri = datastore.getURI(ref) 

1152 datastore.trash(ref) 

1153 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1154 datastore.emptyTrash() 

1155 self.assertFalse(uri.exists(), uri) 

1156 

1157 # Trash it again should be fine. 

1158 datastore.trash(ref) 

1159 

1160 # Trash multiple items at once. 

1161 subset = [refs.pop(), refs.pop()] 

1162 datastore.trash(subset) 

1163 datastore.emptyTrash() 

1164 

1165 # Remove a record and trash should do nothing. 

1166 # This is execution butler scenario. 

1167 ref = refs.pop() 

1168 uri = datastore.getURI(ref) 

1169 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1170 self.assertTrue(uri.exists()) 

1171 datastore.trash(ref) 

1172 datastore.emptyTrash() 

1173 self.assertTrue(uri.exists()) 

1174 

1175 # Switch on trust and it should delete the file. 

1176 datastore.trustGetRequest = True 

1177 datastore.trash([ref]) 

1178 self.assertFalse(uri.exists()) 

1179 

1180 # Remove multiples at once in trust mode. 

1181 subset = [refs.pop() for i in range(3)] 

1182 datastore.trash(subset) 

1183 datastore.trash(refs.pop()) # Check that a single ref can trash 

1184 

1185 

1186class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1187 """Test datastore cleans up on failure.""" 

1188 

1189 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1190 

1191 def setUp(self) -> None: 

1192 # Override the working directory before calling the base class 

1193 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1194 super().setUp() 

1195 

1196 def testCleanup(self) -> None: 

1197 """Test that a failed formatter write does cleanup a partial file.""" 

1198 metrics = makeExampleMetrics() 

1199 datastore = self.makeDatastore() 

1200 

1201 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1202 

1203 dimensions = self.universe.conform(("visit", "physical_filter")) 

1204 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1205 

1206 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1207 

1208 # Determine where the file will end up (we assume Formatters use 

1209 # the same file extension) 

1210 expectedUri = datastore.getURI(ref, predict=True) 

1211 self.assertEqual(expectedUri.fragment, "predicted") 

1212 

1213 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1214 

1215 # Try formatter that fails and formatter that fails and leaves 

1216 # a file behind 

1217 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1218 with self.subTest(formatter=formatter): 

1219 # Monkey patch the formatter 

1220 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1221 

1222 # Try to put the dataset, it should fail 

1223 with self.assertRaises(RuntimeError): 

1224 datastore.put(metrics, ref) 

1225 

1226 # Check that there is no file on disk 

1227 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1228 

1229 # Check that there is a directory 

1230 dir = expectedUri.dirname() 

1231 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1232 

1233 # Force YamlFormatter and check that this time a file is written 

1234 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1235 datastore.put(metrics, ref) 

1236 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1237 datastore.remove(ref) 

1238 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1239 

1240 

1241class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1242 """PosixDatastore specialization""" 

1243 

1244 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1245 uriScheme = "mem" 

1246 hasUnsupportedPut = False 

1247 ingestTransferModes = () 

1248 isEphemeral = True 

1249 rootKeys = None 

1250 validationCanFail = False 

1251 

1252 

1253class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1254 """ChainedDatastore specialization using a POSIXDatastore""" 

1255 

1256 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1257 hasUnsupportedPut = False 

1258 canIngestNoTransferAuto = False 

1259 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1260 isEphemeral = False 

1261 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1262 validationCanFail = True 

1263 

1264 

1265class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1266 """ChainedDatastore specialization using all InMemoryDatastore""" 

1267 

1268 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1269 validationCanFail = False 

1270 

1271 

1272class DatastoreConstraintsTests(DatastoreTestsBase): 

1273 """Basic tests of constraints model of Datastores.""" 

1274 

1275 def testConstraints(self) -> None: 

1276 """Test constraints model. Assumes that each test class has the 

1277 same constraints. 

1278 """ 

1279 metrics = makeExampleMetrics() 

1280 datastore = self.makeDatastore() 

1281 

1282 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1283 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1284 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1285 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1286 

1287 # Write empty file suitable for ingest check (JSON and YAML variants) 

1288 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1289 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1290 for datasetTypeName, sc, accepted in ( 

1291 ("metric", sc1, True), 

1292 ("metric5", sc1, False), 

1293 ("metric33", sc1, True), 

1294 ("metric5", sc2, True), 

1295 ): 

1296 # Choose different temp file depending on StorageClass 

1297 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1298 

1299 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1300 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1301 if accepted: 

1302 datastore.put(metrics, ref) 

1303 self.assertTrue(datastore.exists(ref)) 

1304 datastore.remove(ref) 

1305 

1306 # Try ingest 

1307 if self.canIngest: 

1308 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1309 self.assertTrue(datastore.exists(ref)) 

1310 datastore.remove(ref) 

1311 else: 

1312 with self.assertRaises(DatasetTypeNotSupportedError): 

1313 datastore.put(metrics, ref) 

1314 self.assertFalse(datastore.exists(ref)) 

1315 

1316 # Again with ingest 

1317 if self.canIngest: 

1318 with self.assertRaises(DatasetTypeNotSupportedError): 

1319 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1320 self.assertFalse(datastore.exists(ref)) 

1321 

1322 

1323class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1324 """PosixDatastore specialization""" 

1325 

1326 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1327 canIngest = True 

1328 

1329 def setUp(self) -> None: 

1330 # Override the working directory before calling the base class 

1331 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1332 super().setUp() 

1333 

1334 

1335class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1336 """InMemoryDatastore specialization.""" 

1337 

1338 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1339 canIngest = False 

1340 

1341 

1342class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1343 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1344 at the ChainedDatstore. 

1345 """ 

1346 

1347 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1348 

1349 

1350class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1351 """ChainedDatastore specialization using a POSIXDatastore.""" 

1352 

1353 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1354 

1355 

1356class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1357 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1358 

1359 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1360 canIngest = False 

1361 

1362 

1363class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1364 """Test that a chained datastore can control constraints per-datastore 

1365 even if child datastore would accept. 

1366 """ 

1367 

1368 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1369 

1370 def setUp(self) -> None: 

1371 # Override the working directory before calling the base class 

1372 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1373 super().setUp() 

1374 

1375 def testConstraints(self) -> None: 

1376 """Test chained datastore constraints model.""" 

1377 metrics = makeExampleMetrics() 

1378 datastore = self.makeDatastore() 

1379 

1380 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1381 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1382 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1383 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1384 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1385 

1386 # Write empty file suitable for ingest check (JSON and YAML variants) 

1387 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1388 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1389 

1390 for typeName, dataId, sc, accept, ingest in ( 

1391 ("metric", dataId1, sc1, (False, True, False), True), 

1392 ("metric5", dataId1, sc1, (False, False, False), False), 

1393 ("metric5", dataId2, sc1, (True, False, False), False), 

1394 ("metric33", dataId2, sc2, (True, True, False), True), 

1395 ("metric5", dataId1, sc2, (False, True, False), True), 

1396 ): 

1397 # Choose different temp file depending on StorageClass 

1398 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1399 

1400 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1401 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1402 if any(accept): 

1403 datastore.put(metrics, ref) 

1404 self.assertTrue(datastore.exists(ref)) 

1405 

1406 # Check each datastore inside the chained datastore 

1407 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1408 self.assertEqual( 

1409 childDatastore.exists(ref), 

1410 expected, 

1411 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1412 ) 

1413 

1414 datastore.remove(ref) 

1415 

1416 # Check that ingest works 

1417 if ingest: 

1418 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1419 self.assertTrue(datastore.exists(ref)) 

1420 

1421 # Check each datastore inside the chained datastore 

1422 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1423 # Ephemeral datastores means InMemory at the moment 

1424 # and that does not accept ingest of files. 

1425 if childDatastore.isEphemeral: 

1426 expected = False 

1427 self.assertEqual( 

1428 childDatastore.exists(ref), 

1429 expected, 

1430 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1431 ) 

1432 

1433 datastore.remove(ref) 

1434 else: 

1435 with self.assertRaises(DatasetTypeNotSupportedError): 

1436 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1437 

1438 else: 

1439 with self.assertRaises(DatasetTypeNotSupportedError): 

1440 datastore.put(metrics, ref) 

1441 self.assertFalse(datastore.exists(ref)) 

1442 

1443 # Again with ingest 

1444 with self.assertRaises(DatasetTypeNotSupportedError): 

1445 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1446 self.assertFalse(datastore.exists(ref)) 

1447 

1448 

1449class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1450 """Tests for datastore caching infrastructure.""" 

1451 

1452 @classmethod 

1453 def setUpClass(cls) -> None: 

1454 cls.storageClassFactory = StorageClassFactory() 

1455 cls.universe = DimensionUniverse() 

1456 

1457 # Ensure that we load the test storage class definitions. 

1458 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1459 cls.storageClassFactory.addFromConfig(scConfigFile) 

1460 

1461 def setUp(self) -> None: 

1462 self.id = 0 

1463 

1464 # Create a root that we can use for caching tests. 

1465 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1466 

1467 # Create some test dataset refs and associated test files 

1468 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1469 dimensions = self.universe.conform(("visit", "physical_filter")) 

1470 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1471 

1472 # Create list of refs and list of temporary files 

1473 n_datasets = 10 

1474 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1475 

1476 root_uri = ResourcePath(self.root, forceDirectory=True) 

1477 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1478 

1479 # Create test files. 

1480 for uri in self.files: 

1481 uri.write(b"0123456789") 

1482 

1483 # Create some composite refs with component files. 

1484 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1485 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1486 self.comp_files = [] 

1487 self.comp_refs = [] 

1488 for n, ref in enumerate(self.composite_refs): 

1489 component_refs = [] 

1490 component_files = [] 

1491 for component in sc.components: 

1492 component_ref = ref.makeComponentRef(component) 

1493 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1494 component_refs.append(component_ref) 

1495 component_files.append(file) 

1496 file.write(b"9876543210") 

1497 

1498 self.comp_files.append(component_files) 

1499 self.comp_refs.append(component_refs) 

1500 

1501 def tearDown(self) -> None: 

1502 if self.root is not None and os.path.exists(self.root): 

1503 shutil.rmtree(self.root, ignore_errors=True) 

1504 

1505 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1506 config = Config.fromYaml(config_str) 

1507 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1508 

1509 def testNoCacheDir(self) -> None: 

1510 config_str = """ 

1511cached: 

1512 root: null 

1513 cacheable: 

1514 metric0: true 

1515 """ 

1516 cache_manager = self._make_cache_manager(config_str) 

1517 

1518 # Look inside to check we don't have a cache directory 

1519 self.assertIsNone(cache_manager._cache_directory) 

1520 

1521 self.assertCache(cache_manager) 

1522 

1523 # Test that the cache directory is marked temporary 

1524 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1525 

1526 def testNoCacheDirReversed(self) -> None: 

1527 """Use default caching status and metric1 to false""" 

1528 config_str = """ 

1529cached: 

1530 root: null 

1531 default: true 

1532 cacheable: 

1533 metric1: false 

1534 """ 

1535 cache_manager = self._make_cache_manager(config_str) 

1536 

1537 self.assertCache(cache_manager) 

1538 

1539 def testEnvvarCacheDir(self) -> None: 

1540 config_str = f""" 

1541cached: 

1542 root: '{self.root}' 

1543 cacheable: 

1544 metric0: true 

1545 """ 

1546 

1547 root = ResourcePath(self.root, forceDirectory=True) 

1548 env_dir = root.join("somewhere", forceDirectory=True) 

1549 elsewhere = root.join("elsewhere", forceDirectory=True) 

1550 

1551 # Environment variable should override the config value. 

1552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1553 cache_manager = self._make_cache_manager(config_str) 

1554 self.assertEqual(cache_manager.cache_directory, env_dir) 

1555 

1556 # This environment variable should not override the config value. 

1557 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1558 cache_manager = self._make_cache_manager(config_str) 

1559 self.assertEqual(cache_manager.cache_directory, root) 

1560 

1561 # No default setting. 

1562 config_str = """ 

1563cached: 

1564 root: null 

1565 default: true 

1566 cacheable: 

1567 metric1: false 

1568 """ 

1569 cache_manager = self._make_cache_manager(config_str) 

1570 

1571 # This environment variable should override the config value. 

1572 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1573 cache_manager = self._make_cache_manager(config_str) 

1574 self.assertEqual(cache_manager.cache_directory, env_dir) 

1575 

1576 # If both environment variables are set the main (not IF_UNSET) 

1577 # variable should win. 

1578 with unittest.mock.patch.dict( 

1579 os.environ, 

1580 { 

1581 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1582 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1583 }, 

1584 ): 

1585 cache_manager = self._make_cache_manager(config_str) 

1586 self.assertEqual(cache_manager.cache_directory, env_dir) 

1587 

1588 # Use the API to set the environment variable, making sure that the 

1589 # variable is reset on exit. 

1590 with unittest.mock.patch.dict( 

1591 os.environ, 

1592 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1593 ): 

1594 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1595 self.assertTrue(defined) 

1596 cache_manager = self._make_cache_manager(config_str) 

1597 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1598 

1599 # Now create the cache manager ahead of time and set the fallback 

1600 # later. 

1601 cache_manager = self._make_cache_manager(config_str) 

1602 self.assertIsNone(cache_manager._cache_directory) 

1603 with unittest.mock.patch.dict( 

1604 os.environ, 

1605 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1606 ): 

1607 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1608 self.assertTrue(defined) 

1609 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1610 

1611 def testExplicitCacheDir(self) -> None: 

1612 config_str = f""" 

1613cached: 

1614 root: '{self.root}' 

1615 cacheable: 

1616 metric0: true 

1617 """ 

1618 cache_manager = self._make_cache_manager(config_str) 

1619 

1620 # Look inside to check we do have a cache directory. 

1621 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1622 

1623 self.assertCache(cache_manager) 

1624 

1625 # Test that the cache directory is not marked temporary 

1626 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1627 

1628 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1629 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1630 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1631 

1632 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1633 self.assertIsInstance(uri, ResourcePath) 

1634 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1635 

1636 # Check presence in cache using ref and then using file extension. 

1637 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1638 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1639 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1640 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1641 

1642 # Cached file should no longer exist but uncached file should be 

1643 # unaffected. 

1644 self.assertFalse(self.files[0].exists()) 

1645 self.assertTrue(self.files[1].exists()) 

1646 

1647 # Should find this file and it should be within the cache directory. 

1648 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1649 self.assertTrue(found.exists()) 

1650 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1651 

1652 # Should not be able to find these in cache 

1653 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1654 self.assertIsNone(found) 

1655 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1656 self.assertIsNone(found) 

1657 

1658 def testNoCache(self) -> None: 

1659 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1660 for uri, ref in zip(self.files, self.refs, strict=True): 

1661 self.assertFalse(cache_manager.should_be_cached(ref)) 

1662 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1663 self.assertFalse(cache_manager.known_to_cache(ref)) 

1664 with cache_manager.find_in_cache(ref, ".txt") as found: 

1665 self.assertIsNone(found, msg=f"{cache_manager}") 

1666 

1667 def _expiration_config(self, mode: str, threshold: int) -> str: 

1668 return f""" 

1669cached: 

1670 default: true 

1671 expiry: 

1672 mode: {mode} 

1673 threshold: {threshold} 

1674 cacheable: 

1675 unused: true 

1676 """ 

1677 

1678 def testCacheExpiryFiles(self) -> None: 

1679 threshold = 2 # Keep at least 2 files. 

1680 mode = "files" 

1681 config_str = self._expiration_config(mode, threshold) 

1682 

1683 cache_manager = self._make_cache_manager(config_str) 

1684 

1685 # Check that an empty cache returns unknown for arbitrary ref 

1686 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1687 

1688 # Should end with datasets: 2, 3, 4 

1689 self.assertExpiration(cache_manager, 5, threshold + 1) 

1690 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1691 

1692 # Check that we will not expire a file that is actively in use. 

1693 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1694 self.assertIsNotNone(found) 

1695 

1696 # Trigger cache expiration that should remove the file 

1697 # we just retrieved. Should now have: 3, 4, 5 

1698 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1699 self.assertIsNotNone(cached) 

1700 

1701 # Cache should still report the standard file count. 

1702 self.assertEqual(cache_manager.file_count, threshold + 1) 

1703 

1704 # Add additional entry to cache. 

1705 # Should now have 4, 5, 6 

1706 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1707 self.assertIsNotNone(cached) 

1708 

1709 # Is the file still there? 

1710 self.assertTrue(found.exists()) 

1711 

1712 # Can we read it? 

1713 data = found.read() 

1714 self.assertGreater(len(data), 0) 

1715 

1716 # Outside context the file should no longer exist. 

1717 self.assertFalse(found.exists()) 

1718 

1719 # File count should not have changed. 

1720 self.assertEqual(cache_manager.file_count, threshold + 1) 

1721 

1722 # Dataset 2 was in the exempt directory but because hardlinks 

1723 # are used it was deleted from the main cache during cache expiry 

1724 # above and so should no longer be found. 

1725 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1726 self.assertIsNone(found) 

1727 

1728 # And the one stored after it is also gone. 

1729 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1730 self.assertIsNone(found) 

1731 

1732 # But dataset 4 is present. 

1733 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1734 self.assertIsNotNone(found) 

1735 

1736 # Adding a new dataset to the cache should now delete it. 

1737 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1738 

1739 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1740 self.assertIsNone(found) 

1741 

1742 def testCacheExpiryDatasets(self) -> None: 

1743 threshold = 2 # Keep 2 datasets. 

1744 mode = "datasets" 

1745 config_str = self._expiration_config(mode, threshold) 

1746 

1747 cache_manager = self._make_cache_manager(config_str) 

1748 self.assertExpiration(cache_manager, 5, threshold + 1) 

1749 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1750 

1751 def testCacheExpiryDatasetsComposite(self) -> None: 

1752 threshold = 2 # Keep 2 datasets. 

1753 mode = "datasets" 

1754 config_str = self._expiration_config(mode, threshold) 

1755 

1756 cache_manager = self._make_cache_manager(config_str) 

1757 

1758 n_datasets = 3 

1759 for i in range(n_datasets): 

1760 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1761 cached = cache_manager.move_to_cache(component_file, component_ref) 

1762 self.assertIsNotNone(cached) 

1763 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1764 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1765 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1766 

1767 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1768 

1769 # Write two new non-composite and the number of files should drop. 

1770 self.assertExpiration(cache_manager, 2, 5) 

1771 

1772 def testCacheExpirySize(self) -> None: 

1773 threshold = 55 # Each file is 10 bytes 

1774 mode = "size" 

1775 config_str = self._expiration_config(mode, threshold) 

1776 

1777 cache_manager = self._make_cache_manager(config_str) 

1778 self.assertExpiration(cache_manager, 10, 6) 

1779 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1780 

1781 def assertExpiration( 

1782 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1783 ) -> None: 

1784 """Insert the datasets and then check the number retained.""" 

1785 for i in range(n_datasets): 

1786 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1787 self.assertIsNotNone(cached) 

1788 

1789 self.assertEqual(cache_manager.file_count, n_retained) 

1790 

1791 # The oldest file should not be in the cache any more. 

1792 for i in range(n_datasets): 

1793 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1794 if i >= n_datasets - n_retained: 

1795 self.assertIsInstance(found, ResourcePath) 

1796 else: 

1797 self.assertIsNone(found) 

1798 

1799 def testCacheExpiryAge(self) -> None: 

1800 threshold = 1 # Expire older than 2 seconds 

1801 mode = "age" 

1802 config_str = self._expiration_config(mode, threshold) 

1803 

1804 cache_manager = self._make_cache_manager(config_str) 

1805 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1806 

1807 # Insert 3 files, then sleep, then insert more. 

1808 for i in range(2): 

1809 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1810 self.assertIsNotNone(cached) 

1811 time.sleep(2.0) 

1812 for j in range(4): 

1813 i = 2 + j # Continue the counting 

1814 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1815 self.assertIsNotNone(cached) 

1816 

1817 # Only the files written after the sleep should exist. 

1818 self.assertEqual(cache_manager.file_count, 4) 

1819 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1820 self.assertIsNone(found) 

1821 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1822 self.assertIsInstance(found, ResourcePath) 

1823 

1824 

1825class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1826 """Test the null datastore.""" 

1827 

1828 storageClassFactory = StorageClassFactory() 

1829 

1830 def test_basics(self) -> None: 

1831 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1832 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1833 

1834 null = NullDatastore(None, None) 

1835 

1836 self.assertFalse(null.exists(ref)) 

1837 self.assertFalse(null.knows(ref)) 

1838 knows = null.knows_these([ref]) 

1839 self.assertFalse(knows[ref]) 

1840 null.validateConfiguration(ref) 

1841 

1842 with self.assertRaises(FileNotFoundError): 

1843 null.get(ref) 

1844 with self.assertRaises(NotImplementedError): 

1845 null.put("", ref) 

1846 with self.assertRaises(FileNotFoundError): 

1847 null.getURI(ref) 

1848 with self.assertRaises(FileNotFoundError): 

1849 null.getURIs(ref) 

1850 with self.assertRaises(FileNotFoundError): 

1851 null.getManyURIs([ref]) 

1852 with self.assertRaises(NotImplementedError): 

1853 null.getLookupKeys() 

1854 with self.assertRaises(NotImplementedError): 

1855 null.import_records({}) 

1856 with self.assertRaises(NotImplementedError): 

1857 null.export_records([]) 

1858 with self.assertRaises(NotImplementedError): 

1859 null.export([ref]) 

1860 with self.assertRaises(NotImplementedError): 

1861 null.transfer(null, ref) 

1862 with self.assertRaises(NotImplementedError): 

1863 null.emptyTrash() 

1864 with self.assertRaises(NotImplementedError): 

1865 null.trash(ref) 

1866 with self.assertRaises(NotImplementedError): 

1867 null.forget([ref]) 

1868 with self.assertRaises(NotImplementedError): 

1869 null.remove(ref) 

1870 with self.assertRaises(NotImplementedError): 

1871 null.retrieveArtifacts([ref], ResourcePath(".")) 

1872 with self.assertRaises(NotImplementedError): 

1873 null.transfer_from(null, [ref]) 

1874 with self.assertRaises(NotImplementedError): 

1875 null.ingest() 

1876 

1877 

1878class DatasetRefURIsTestCase(unittest.TestCase): 

1879 """Tests for DatasetRefURIs.""" 

1880 

1881 def testSequenceAccess(self) -> None: 

1882 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1883 uris = DatasetRefURIs() 

1884 

1885 self.assertEqual(len(uris), 2) 

1886 self.assertEqual(uris[0], None) 

1887 self.assertEqual(uris[1], {}) 

1888 

1889 primaryURI = ResourcePath("1/2/3") 

1890 componentURI = ResourcePath("a/b/c") 

1891 

1892 # affirm that DatasetRefURIs does not support MutableSequence functions 

1893 with self.assertRaises(TypeError): 

1894 uris[0] = primaryURI 

1895 with self.assertRaises(TypeError): 

1896 uris[1] = {"foo": componentURI} 

1897 

1898 # but DatasetRefURIs can be set by property name: 

1899 uris.primaryURI = primaryURI 

1900 uris.componentURIs = {"foo": componentURI} 

1901 self.assertEqual(uris.primaryURI, primaryURI) 

1902 self.assertEqual(uris[0], primaryURI) 

1903 

1904 primary, components = uris 

1905 self.assertEqual(primary, primaryURI) 

1906 self.assertEqual(components, {"foo": componentURI}) 

1907 

1908 def testRepr(self) -> None: 

1909 """Verify __repr__ output.""" 

1910 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1911 self.assertEqual( 

1912 repr(uris), 

1913 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1914 ) 

1915 

1916 

1917class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1918 """Test the StoredFileInfo class.""" 

1919 

1920 storageClassFactory = StorageClassFactory() 

1921 

1922 def test_StoredFileInfo(self) -> None: 

1923 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1924 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1925 

1926 record = dict( 

1927 storage_class="StructuredDataDict", 

1928 formatter="lsst.daf.butler.Formatter", 

1929 path="a/b/c.txt", 

1930 component="component", 

1931 checksum=None, 

1932 file_size=5, 

1933 ) 

1934 info = StoredFileInfo.from_record(record) 

1935 

1936 self.assertEqual(info.to_record(), record) 

1937 

1938 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1939 rebased = info.rebase(ref2) 

1940 self.assertEqual(rebased.rebase(ref), info) 

1941 

1942 with self.assertRaises(TypeError): 

1943 rebased.update(formatter=42) 

1944 

1945 with self.assertRaises(ValueError): 

1946 rebased.update(something=42, new="42") 

1947 

1948 # Check that pickle works on StoredFileInfo. 

1949 pickled_info = pickle.dumps(info) 

1950 unpickled_info = pickle.loads(pickled_info) 

1951 self.assertEqual(unpickled_info, info) 

1952 

1953 

1954if __name__ == "__main__": 

1955 unittest.main()