Coverage for tests/test_datastore.py: 11%

1092 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-13 10:57 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30import contextlib 

31import os 

32import pickle 

33import shutil 

34import tempfile 

35import time 

36import unittest 

37import unittest.mock 

38import uuid 

39from collections.abc import Callable 

40from typing import Any, Iterator, cast 

41 

42import lsst.utils.tests 

43import yaml 

44from lsst.daf.butler import ( 

45 Config, 

46 DataCoordinate, 

47 DatasetIdGenEnum, 

48 DatasetRef, 

49 DatasetType, 

50 DatasetTypeNotSupportedError, 

51 Datastore, 

52 DimensionUniverse, 

53 FileDataset, 

54 StorageClass, 

55 StorageClassFactory, 

56) 

57from lsst.daf.butler.datastore import DatasetRefURIs, DatastoreConfig, DatastoreValidationError, NullDatastore 

58from lsst.daf.butler.datastore.cache_manager import ( 

59 DatastoreCacheManager, 

60 DatastoreCacheManagerConfig, 

61 DatastoreDisabledCacheManager, 

62) 

63from lsst.daf.butler.datastore.stored_file_info import StoredFileInfo 

64from lsst.daf.butler.formatters.yaml import YamlFormatter 

65from lsst.daf.butler.tests import ( 

66 BadNoWriteFormatter, 

67 BadWriteFormatter, 

68 DatasetTestHelper, 

69 DatastoreTestHelper, 

70 DummyRegistry, 

71 MetricsExample, 

72 MetricsExampleDataclass, 

73 MetricsExampleModel, 

74) 

75from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

76from lsst.daf.butler.tests.utils import TestCaseMixin 

77from lsst.resources import ResourcePath 

78from lsst.utils import doImport 

79 

80TESTDIR = os.path.dirname(__file__) 

81 

82 

83def makeExampleMetrics(use_none: bool = False) -> MetricsExample: 

84 """Make example dataset that can be stored in butler.""" 

85 if use_none: 

86 array = None 

87 else: 

88 array = [563, 234, 456.7, 105, 2054, -1045] 

89 return MetricsExample( 

90 {"AM1": 5.2, "AM2": 30.6}, 

91 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

92 array, 

93 ) 

94 

95 

96class TransactionTestError(Exception): 

97 """Specific error for transactions, to prevent misdiagnosing 

98 that might otherwise occur when a standard exception is used. 

99 """ 

100 

101 pass 

102 

103 

104class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper, TestCaseMixin): 

105 """Support routines for datastore testing""" 

106 

107 root: str | None = None 

108 universe: DimensionUniverse 

109 storageClassFactory: StorageClassFactory 

110 

111 @classmethod 

112 def setUpClass(cls) -> None: 

113 # Storage Classes are fixed for all datastores in these tests 

114 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

115 cls.storageClassFactory = StorageClassFactory() 

116 cls.storageClassFactory.addFromConfig(scConfigFile) 

117 

118 # Read the Datastore config so we can get the class 

119 # information (since we should not assume the constructor 

120 # name here, but rely on the configuration file itself) 

121 datastoreConfig = DatastoreConfig(cls.configFile) 

122 cls.datastoreType = cast(type[Datastore], doImport(datastoreConfig["cls"])) 

123 cls.universe = DimensionUniverse() 

124 

125 def setUp(self) -> None: 

126 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

127 

128 def tearDown(self) -> None: 

129 if self.root is not None and os.path.exists(self.root): 

130 shutil.rmtree(self.root, ignore_errors=True) 

131 

132 

133class DatastoreTests(DatastoreTestsBase): 

134 """Some basic tests of a simple datastore.""" 

135 

136 hasUnsupportedPut = True 

137 rootKeys: tuple[str, ...] | None = None 

138 isEphemeral: bool = False 

139 validationCanFail: bool = False 

140 

141 def testConfigRoot(self) -> None: 

142 full = DatastoreConfig(self.configFile) 

143 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

144 newroot = "/random/location" 

145 self.datastoreType.setConfigRoot(newroot, config, full) 

146 if self.rootKeys: 

147 for k in self.rootKeys: 

148 self.assertIn(newroot, config[k]) 

149 

150 def testConstructor(self) -> None: 

151 datastore = self.makeDatastore() 

152 self.assertIsNotNone(datastore) 

153 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

154 

155 def testConfigurationValidation(self) -> None: 

156 datastore = self.makeDatastore() 

157 sc = self.storageClassFactory.getStorageClass("ThingOne") 

158 datastore.validateConfiguration([sc]) 

159 

160 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

161 if self.validationCanFail: 

162 with self.assertRaises(DatastoreValidationError): 

163 datastore.validateConfiguration([sc2], logFailures=True) 

164 

165 dimensions = self.universe.conform(("visit", "physical_filter")) 

166 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

167 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

168 datastore.validateConfiguration([ref]) 

169 

170 def testParameterValidation(self) -> None: 

171 """Check that parameters are validated""" 

172 sc = self.storageClassFactory.getStorageClass("ThingOne") 

173 dimensions = self.universe.conform(("visit", "physical_filter")) 

174 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

175 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

176 datastore = self.makeDatastore() 

177 data = {1: 2, 3: 4} 

178 datastore.put(data, ref) 

179 newdata = datastore.get(ref) 

180 self.assertEqual(data, newdata) 

181 with self.assertRaises(KeyError): 

182 newdata = datastore.get(ref, parameters={"missing": 5}) 

183 

184 def testBasicPutGet(self) -> None: 

185 metrics = makeExampleMetrics() 

186 datastore = self.makeDatastore() 

187 

188 # Create multiple storage classes for testing different formulations 

189 storageClasses = [ 

190 self.storageClassFactory.getStorageClass(sc) 

191 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

192 ] 

193 

194 dimensions = self.universe.conform(("visit", "physical_filter")) 

195 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

196 dataId2 = {"instrument": "dummy", "visit": 53, "physical_filter": "V", "band": "v"} 

197 

198 for sc in storageClasses: 

199 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

200 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2) 

201 

202 # Make sure that using getManyURIs without predicting before the 

203 # dataset has been put raises. 

204 with self.assertRaises(FileNotFoundError): 

205 datastore.getManyURIs([ref], predict=False) 

206 

207 # Make sure that using getManyURIs with predicting before the 

208 # dataset has been put predicts the URI. 

209 uris = datastore.getManyURIs([ref, ref2], predict=True) 

210 self.assertIn("52", uris[ref].primaryURI.geturl()) 

211 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

212 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

213 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

214 

215 datastore.put(metrics, ref) 

216 

217 # Does it exist? 

218 self.assertTrue(datastore.exists(ref)) 

219 self.assertTrue(datastore.knows(ref)) 

220 multi = datastore.knows_these([ref]) 

221 self.assertTrue(multi[ref]) 

222 multi = datastore.mexists([ref, ref2]) 

223 self.assertTrue(multi[ref]) 

224 self.assertFalse(multi[ref2]) 

225 

226 # Get 

227 metricsOut = datastore.get(ref, parameters=None) 

228 self.assertEqual(metrics, metricsOut) 

229 

230 uri = datastore.getURI(ref) 

231 self.assertEqual(uri.scheme, self.uriScheme) 

232 

233 uris = datastore.getManyURIs([ref]) 

234 self.assertEqual(len(uris), 1) 

235 ref, uri = uris.popitem() 

236 self.assertTrue(uri.primaryURI.exists()) 

237 self.assertFalse(uri.componentURIs) 

238 

239 # Get a component -- we need to construct new refs for them 

240 # with derived storage classes but with parent ID 

241 for comp in ("data", "output"): 

242 compRef = ref.makeComponentRef(comp) 

243 output = datastore.get(compRef) 

244 self.assertEqual(output, getattr(metricsOut, comp)) 

245 

246 uri = datastore.getURI(compRef) 

247 self.assertEqual(uri.scheme, self.uriScheme) 

248 

249 uris = datastore.getManyURIs([compRef]) 

250 self.assertEqual(len(uris), 1) 

251 

252 storageClass = sc 

253 

254 # Check that we can put a metric with None in a component and 

255 # get it back as None 

256 metricsNone = makeExampleMetrics(use_none=True) 

257 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V", "band": "v"} 

258 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone) 

259 datastore.put(metricsNone, refNone) 

260 

261 comp = "data" 

262 for comp in ("data", "output"): 

263 compRef = refNone.makeComponentRef(comp) 

264 output = datastore.get(compRef) 

265 self.assertEqual(output, getattr(metricsNone, comp)) 

266 

267 # Check that a put fails if the dataset type is not supported 

268 if self.hasUnsupportedPut: 

269 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

270 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

271 with self.assertRaises(DatasetTypeNotSupportedError): 

272 datastore.put(metrics, ref) 

273 

274 # These should raise 

275 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

276 with self.assertRaises(FileNotFoundError): 

277 # non-existing file 

278 datastore.get(ref) 

279 

280 # Get a URI from it 

281 uri = datastore.getURI(ref, predict=True) 

282 self.assertEqual(uri.scheme, self.uriScheme) 

283 

284 with self.assertRaises(FileNotFoundError): 

285 datastore.getURI(ref) 

286 

287 def testTrustGetRequest(self) -> None: 

288 """Check that we can get datasets that registry knows nothing about.""" 

289 datastore = self.makeDatastore() 

290 

291 # Skip test if the attribute is not defined 

292 if not hasattr(datastore, "trustGetRequest"): 

293 return 

294 

295 metrics = makeExampleMetrics() 

296 

297 i = 0 

298 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

299 i += 1 

300 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

301 

302 if sc_name == "StructuredComposite": 

303 disassembled = True 

304 else: 

305 disassembled = False 

306 

307 # Start datastore in default configuration of using registry 

308 datastore.trustGetRequest = False 

309 

310 # Create multiple storage classes for testing with or without 

311 # disassembly 

312 sc = self.storageClassFactory.getStorageClass(sc_name) 

313 dimensions = self.universe.conform(("visit", "physical_filter")) 

314 

315 dataId = {"instrument": "dummy", "visit": 52 + i, "physical_filter": "V", "band": "v"} 

316 

317 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

318 datastore.put(metrics, ref) 

319 

320 # Does it exist? 

321 self.assertTrue(datastore.exists(ref)) 

322 self.assertTrue(datastore.knows(ref)) 

323 multi = datastore.knows_these([ref]) 

324 self.assertTrue(multi[ref]) 

325 multi = datastore.mexists([ref]) 

326 self.assertTrue(multi[ref]) 

327 

328 # Get 

329 metricsOut = datastore.get(ref) 

330 self.assertEqual(metrics, metricsOut) 

331 

332 # Get the URI(s) 

333 primaryURI, componentURIs = datastore.getURIs(ref) 

334 if disassembled: 

335 self.assertIsNone(primaryURI) 

336 self.assertEqual(len(componentURIs), 3) 

337 else: 

338 self.assertIn(datasetTypeName, primaryURI.path) 

339 self.assertFalse(componentURIs) 

340 

341 # Delete registry entry so now we are trusting 

342 datastore.removeStoredItemInfo(ref) 

343 

344 # Now stop trusting and check that things break 

345 datastore.trustGetRequest = False 

346 

347 # Does it exist? 

348 self.assertFalse(datastore.exists(ref)) 

349 self.assertFalse(datastore.knows(ref)) 

350 multi = datastore.knows_these([ref]) 

351 self.assertFalse(multi[ref]) 

352 multi = datastore.mexists([ref]) 

353 self.assertFalse(multi[ref]) 

354 

355 with self.assertRaises(FileNotFoundError): 

356 datastore.get(ref) 

357 

358 if sc_name != "StructuredDataNoComponents": 

359 with self.assertRaises(FileNotFoundError): 

360 datastore.get(ref.makeComponentRef("data")) 

361 

362 # URI should fail unless we ask for prediction 

363 with self.assertRaises(FileNotFoundError): 

364 datastore.getURIs(ref) 

365 

366 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

367 if disassembled: 

368 self.assertIsNone(predicted_primary) 

369 self.assertEqual(len(predicted_disassembled), 3) 

370 for uri in predicted_disassembled.values(): 

371 self.assertEqual(uri.fragment, "predicted") 

372 self.assertIn(datasetTypeName, uri.path) 

373 else: 

374 self.assertIn(datasetTypeName, predicted_primary.path) 

375 self.assertFalse(predicted_disassembled) 

376 self.assertEqual(predicted_primary.fragment, "predicted") 

377 

378 # Now enable registry-free trusting mode 

379 datastore.trustGetRequest = True 

380 

381 # Try again to get it 

382 metricsOut = datastore.get(ref) 

383 self.assertEqual(metricsOut, metrics) 

384 

385 # Does it exist? 

386 self.assertTrue(datastore.exists(ref)) 

387 

388 # Get a component 

389 if sc_name != "StructuredDataNoComponents": 

390 comp = "data" 

391 compRef = ref.makeComponentRef(comp) 

392 output = datastore.get(compRef) 

393 self.assertEqual(output, getattr(metrics, comp)) 

394 

395 # Get the URI -- if we trust this should work even without 

396 # enabling prediction. 

397 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

398 self.assertEqual(primaryURI2, primaryURI) 

399 self.assertEqual(componentURIs2, componentURIs) 

400 

401 # Check for compatible storage class. 

402 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

403 # Make new dataset ref with compatible storage class. 

404 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

405 

406 # Without `set_retrieve_dataset_type_method` it will fail to 

407 # find correct file. 

408 self.assertFalse(datastore.exists(ref_comp)) 

409 with self.assertRaises(FileNotFoundError): 

410 datastore.get(ref_comp) 

411 with self.assertRaises(FileNotFoundError): 

412 datastore.get(ref, storageClass="StructuredDataDictJson") 

413 

414 # Need a special method to generate stored dataset type. 

415 def _stored_dataset_type(name: str, ref: DatasetRef = ref) -> DatasetType: 

416 if name == ref.datasetType.name: 

417 return ref.datasetType 

418 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

419 

420 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

421 

422 # Storage class override with original dataset ref. 

423 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

424 self.assertIsInstance(metrics_as_dict, dict) 

425 

426 # get() should return a dict now. 

427 metrics_as_dict = datastore.get(ref_comp) 

428 self.assertIsInstance(metrics_as_dict, dict) 

429 

430 # exists() should work as well. 

431 self.assertTrue(datastore.exists(ref_comp)) 

432 

433 datastore.set_retrieve_dataset_type_method(None) 

434 

435 def testDisassembly(self) -> None: 

436 """Test disassembly within datastore.""" 

437 metrics = makeExampleMetrics() 

438 if self.isEphemeral: 

439 # in-memory datastore does not disassemble 

440 return 

441 

442 # Create multiple storage classes for testing different formulations 

443 # of composites. One of these will not disassemble to provide 

444 # a reference. 

445 storageClasses = [ 

446 self.storageClassFactory.getStorageClass(sc) 

447 for sc in ( 

448 "StructuredComposite", 

449 "StructuredCompositeTestA", 

450 "StructuredCompositeTestB", 

451 "StructuredCompositeReadComp", 

452 "StructuredData", # No disassembly 

453 "StructuredCompositeReadCompNoDisassembly", 

454 ) 

455 ] 

456 

457 # Create the test datastore 

458 datastore = self.makeDatastore() 

459 

460 # Dummy dataId 

461 dimensions = self.universe.conform(("visit", "physical_filter")) 

462 dataId = {"instrument": "dummy", "visit": 428, "physical_filter": "R"} 

463 

464 for i, sc in enumerate(storageClasses): 

465 with self.subTest(storageClass=sc.name): 

466 # Create a different dataset type each time round 

467 # so that a test failure in this subtest does not trigger 

468 # a cascade of tests because of file clashes 

469 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId) 

470 

471 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

472 

473 datastore.put(metrics, ref) 

474 

475 baseURI, compURIs = datastore.getURIs(ref) 

476 if disassembled: 

477 self.assertIsNone(baseURI) 

478 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

479 else: 

480 self.assertIsNotNone(baseURI) 

481 self.assertEqual(compURIs, {}) 

482 

483 metrics_get = datastore.get(ref) 

484 self.assertEqual(metrics_get, metrics) 

485 

486 # Retrieve the composite with read parameter 

487 stop = 4 

488 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

489 self.assertEqual(metrics_get.summary, metrics.summary) 

490 self.assertEqual(metrics_get.output, metrics.output) 

491 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

492 

493 # Retrieve a component 

494 data = datastore.get(ref.makeComponentRef("data")) 

495 self.assertEqual(data, metrics.data) 

496 

497 # On supported storage classes attempt to access a read 

498 # only component 

499 if "ReadComp" in sc.name: 

500 cRef = ref.makeComponentRef("counter") 

501 counter = datastore.get(cRef) 

502 self.assertEqual(counter, len(metrics.data)) 

503 

504 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

505 self.assertEqual(counter, stop) 

506 

507 datastore.remove(ref) 

508 

509 def prepDeleteTest(self, n_refs: int = 1) -> tuple[Datastore, tuple[DatasetRef, ...]]: 

510 metrics = makeExampleMetrics() 

511 datastore = self.makeDatastore() 

512 # Put 

513 dimensions = self.universe.conform(("visit", "physical_filter")) 

514 sc = self.storageClassFactory.getStorageClass("StructuredData") 

515 refs = [] 

516 for i in range(n_refs): 

517 dataId = {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U", "band": "u"} 

518 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

519 datastore.put(metrics, ref) 

520 

521 # Does it exist? 

522 self.assertTrue(datastore.exists(ref)) 

523 

524 # Get 

525 metricsOut = datastore.get(ref) 

526 self.assertEqual(metrics, metricsOut) 

527 refs.append(ref) 

528 

529 return datastore, *refs 

530 

531 def testRemove(self) -> None: 

532 datastore, ref = self.prepDeleteTest() 

533 

534 # Remove 

535 datastore.remove(ref) 

536 

537 # Does it exist? 

538 self.assertFalse(datastore.exists(ref)) 

539 

540 # Do we now get a predicted URI? 

541 uri = datastore.getURI(ref, predict=True) 

542 self.assertEqual(uri.fragment, "predicted") 

543 

544 # Get should now fail 

545 with self.assertRaises(FileNotFoundError): 

546 datastore.get(ref) 

547 # Can only delete once 

548 with self.assertRaises(FileNotFoundError): 

549 datastore.remove(ref) 

550 

551 def testForget(self) -> None: 

552 datastore, ref = self.prepDeleteTest() 

553 

554 # Remove 

555 datastore.forget([ref]) 

556 

557 # Does it exist (as far as we know)? 

558 self.assertFalse(datastore.exists(ref)) 

559 

560 # Do we now get a predicted URI? 

561 uri = datastore.getURI(ref, predict=True) 

562 self.assertEqual(uri.fragment, "predicted") 

563 

564 # Get should now fail 

565 with self.assertRaises(FileNotFoundError): 

566 datastore.get(ref) 

567 

568 # Forgetting again is a silent no-op 

569 datastore.forget([ref]) 

570 

571 # Predicted URI should still point to the file. 

572 self.assertTrue(uri.exists()) 

573 

574 def testTransfer(self) -> None: 

575 metrics = makeExampleMetrics() 

576 

577 dimensions = self.universe.conform(("visit", "physical_filter")) 

578 dataId = {"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime", "band": "u"} 

579 

580 sc = self.storageClassFactory.getStorageClass("StructuredData") 

581 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

582 

583 inputDatastore = self.makeDatastore("test_input_datastore") 

584 outputDatastore = self.makeDatastore("test_output_datastore") 

585 

586 inputDatastore.put(metrics, ref) 

587 outputDatastore.transfer(inputDatastore, ref) 

588 

589 metricsOut = outputDatastore.get(ref) 

590 self.assertEqual(metrics, metricsOut) 

591 

592 def testBasicTransaction(self) -> None: 

593 datastore = self.makeDatastore() 

594 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

595 dimensions = self.universe.conform(("visit", "physical_filter")) 

596 nDatasets = 6 

597 dataIds = [ 

598 {"instrument": "dummy", "visit": i, "physical_filter": "V", "band": "v"} for i in range(nDatasets) 

599 ] 

600 data = [ 

601 ( 

602 self.makeDatasetRef("metric", dimensions, storageClass, dataId), 

603 makeExampleMetrics(), 

604 ) 

605 for dataId in dataIds 

606 ] 

607 succeed = data[: nDatasets // 2] 

608 fail = data[nDatasets // 2 :] 

609 # All datasets added in this transaction should continue to exist 

610 with datastore.transaction(): 

611 for ref, metrics in succeed: 

612 datastore.put(metrics, ref) 

613 # Whereas datasets added in this transaction should not 

614 with self.assertRaises(TransactionTestError): 

615 with datastore.transaction(): 

616 for ref, metrics in fail: 

617 datastore.put(metrics, ref) 

618 raise TransactionTestError("This should propagate out of the context manager") 

619 # Check for datasets that should exist 

620 for ref, metrics in succeed: 

621 # Does it exist? 

622 self.assertTrue(datastore.exists(ref)) 

623 # Get 

624 metricsOut = datastore.get(ref, parameters=None) 

625 self.assertEqual(metrics, metricsOut) 

626 # URI 

627 uri = datastore.getURI(ref) 

628 self.assertEqual(uri.scheme, self.uriScheme) 

629 # Check for datasets that should not exist 

630 for ref, _ in fail: 

631 # These should raise 

632 with self.assertRaises(FileNotFoundError): 

633 # non-existing file 

634 datastore.get(ref) 

635 with self.assertRaises(FileNotFoundError): 

636 datastore.getURI(ref) 

637 

638 def testNestedTransaction(self) -> None: 

639 datastore = self.makeDatastore() 

640 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

641 dimensions = self.universe.conform(("visit", "physical_filter")) 

642 metrics = makeExampleMetrics() 

643 

644 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

645 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

646 datastore.put(metrics, refBefore) 

647 with self.assertRaises(TransactionTestError): 

648 with datastore.transaction(): 

649 dataId = {"instrument": "dummy", "visit": 1, "physical_filter": "V", "band": "v"} 

650 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

651 datastore.put(metrics, refOuter) 

652 with datastore.transaction(): 

653 dataId = {"instrument": "dummy", "visit": 2, "physical_filter": "V", "band": "v"} 

654 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

655 datastore.put(metrics, refInner) 

656 # All datasets should exist 

657 for ref in (refBefore, refOuter, refInner): 

658 metricsOut = datastore.get(ref, parameters=None) 

659 self.assertEqual(metrics, metricsOut) 

660 raise TransactionTestError("This should roll back the transaction") 

661 # Dataset(s) inserted before the transaction should still exist 

662 metricsOut = datastore.get(refBefore, parameters=None) 

663 self.assertEqual(metrics, metricsOut) 

664 # But all datasets inserted during the (rolled back) transaction 

665 # should be gone 

666 with self.assertRaises(FileNotFoundError): 

667 datastore.get(refOuter) 

668 with self.assertRaises(FileNotFoundError): 

669 datastore.get(refInner) 

670 

671 def _prepareIngestTest(self) -> tuple[MetricsExample, DatasetRef]: 

672 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

673 dimensions = self.universe.conform(("visit", "physical_filter")) 

674 metrics = makeExampleMetrics() 

675 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

676 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

677 return metrics, ref 

678 

679 def runIngestTest(self, func: Callable[[MetricsExample, str, DatasetRef], None]) -> None: 

680 metrics, ref = self._prepareIngestTest() 

681 # The file will be deleted after the test. 

682 # For symlink tests this leads to a situation where the datastore 

683 # points to a file that does not exist. This will make os.path.exist 

684 # return False but then the new symlink will fail with 

685 # FileExistsError later in the code so the test still passes. 

686 with _temp_yaml_file(metrics._asdict()) as path: 

687 func(metrics, path, ref) 

688 

689 def testIngestNoTransfer(self) -> None: 

690 """Test ingesting existing files with no transfer.""" 

691 for mode in (None, "auto"): 

692 # Some datastores have auto but can't do in place transfer 

693 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

694 continue 

695 

696 with self.subTest(mode=mode): 

697 datastore = self.makeDatastore() 

698 

699 def succeed( 

700 obj: MetricsExample, 

701 path: str, 

702 ref: DatasetRef, 

703 mode: str | None = mode, 

704 datastore: Datastore = datastore, 

705 ) -> None: 

706 """Ingest a file already in the datastore root.""" 

707 # first move it into the root, and adjust the path 

708 # accordingly 

709 path = shutil.copy(path, datastore.root.ospath) 

710 path = os.path.relpath(path, start=datastore.root.ospath) 

711 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

712 self.assertEqual(obj, datastore.get(ref)) 

713 

714 def failInputDoesNotExist( 

715 obj: MetricsExample, 

716 path: str, 

717 ref: DatasetRef, 

718 mode: str | None = mode, 

719 datastore: Datastore = datastore, 

720 ) -> None: 

721 """Can't ingest files if we're given a bad path.""" 

722 with self.assertRaises(FileNotFoundError): 

723 datastore.ingest( 

724 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

725 ) 

726 self.assertFalse(datastore.exists(ref)) 

727 

728 def failOutsideRoot( 

729 obj: MetricsExample, 

730 path: str, 

731 ref: DatasetRef, 

732 mode: str | None = mode, 

733 datastore: Datastore = datastore, 

734 ) -> None: 

735 """Can't ingest files outside of datastore root unless 

736 auto. 

737 """ 

738 if mode == "auto": 

739 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

740 self.assertTrue(datastore.exists(ref)) 

741 else: 

742 with self.assertRaises(RuntimeError): 

743 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

744 self.assertFalse(datastore.exists(ref)) 

745 

746 def failNotImplemented( 

747 obj: MetricsExample, 

748 path: str, 

749 ref: DatasetRef, 

750 mode: str | None = mode, 

751 datastore: Datastore = datastore, 

752 ) -> None: 

753 with self.assertRaises(NotImplementedError): 

754 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

755 

756 if mode in self.ingestTransferModes: 

757 self.runIngestTest(failOutsideRoot) 

758 self.runIngestTest(failInputDoesNotExist) 

759 self.runIngestTest(succeed) 

760 else: 

761 self.runIngestTest(failNotImplemented) 

762 

763 def testIngestTransfer(self) -> None: 

764 """Test ingesting existing files after transferring them.""" 

765 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

766 with self.subTest(mode=mode): 

767 datastore = self.makeDatastore(mode) 

768 

769 def succeed( 

770 obj: MetricsExample, 

771 path: str, 

772 ref: DatasetRef, 

773 mode: str | None = mode, 

774 datastore: Datastore = datastore, 

775 ) -> None: 

776 """Ingest a file by transferring it to the template 

777 location. 

778 """ 

779 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

780 self.assertEqual(obj, datastore.get(ref)) 

781 file_exists = os.path.exists(path) 

782 if mode == "move": 

783 self.assertFalse(file_exists) 

784 else: 

785 self.assertTrue(file_exists) 

786 

787 def failInputDoesNotExist( 

788 obj: MetricsExample, 

789 path: str, 

790 ref: DatasetRef, 

791 mode: str | None = mode, 

792 datastore: Datastore = datastore, 

793 ) -> None: 

794 """Can't ingest files if we're given a bad path.""" 

795 with self.assertRaises(FileNotFoundError): 

796 # Ensure the file does not look like it is in 

797 # datastore for auto mode 

798 datastore.ingest( 

799 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

800 ) 

801 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

802 

803 def failNotImplemented( 

804 obj: MetricsExample, 

805 path: str, 

806 ref: DatasetRef, 

807 mode: str | None = mode, 

808 datastore: Datastore = datastore, 

809 ) -> None: 

810 with self.assertRaises(NotImplementedError): 

811 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

812 

813 if mode in self.ingestTransferModes: 

814 self.runIngestTest(failInputDoesNotExist) 

815 self.runIngestTest(succeed) 

816 else: 

817 self.runIngestTest(failNotImplemented) 

818 

819 def testIngestSymlinkOfSymlink(self) -> None: 

820 """Special test for symlink to a symlink ingest""" 

821 metrics, ref = self._prepareIngestTest() 

822 # The aim of this test is to create a dataset on disk, then 

823 # create a symlink to it and finally ingest the symlink such that 

824 # the symlink in the datastore points to the original dataset. 

825 for mode in ("symlink", "relsymlink"): 

826 if mode not in self.ingestTransferModes: 

827 continue 

828 

829 print(f"Trying mode {mode}") 

830 with _temp_yaml_file(metrics._asdict()) as realpath: 

831 with tempfile.TemporaryDirectory() as tmpdir: 

832 sympath = os.path.join(tmpdir, "symlink.yaml") 

833 os.symlink(os.path.realpath(realpath), sympath) 

834 

835 datastore = self.makeDatastore() 

836 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

837 

838 uri = datastore.getURI(ref) 

839 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

840 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

841 

842 linkTarget = os.readlink(uri.ospath) 

843 if mode == "relsymlink": 

844 self.assertFalse(os.path.isabs(linkTarget)) 

845 else: 

846 self.assertTrue(os.path.samefile(linkTarget, realpath)) 

847 

848 # Check that we can get the dataset back regardless of mode 

849 metric2 = datastore.get(ref) 

850 self.assertEqual(metric2, metrics) 

851 

852 # Cleanup the file for next time round loop 

853 # since it will get the same file name in store 

854 datastore.remove(ref) 

855 

856 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

857 datastore = self.makeDatastore(name) 

858 

859 # For now only the FileDatastore can be used for this test. 

860 # ChainedDatastore that only includes InMemoryDatastores have to be 

861 # skipped as well. 

862 for name in datastore.names: 

863 if not name.startswith("InMemoryDatastore"): 

864 break 

865 else: 

866 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

867 

868 metrics = makeExampleMetrics() 

869 dimensions = self.universe.conform(("visit", "physical_filter")) 

870 sc = self.storageClassFactory.getStorageClass("StructuredData") 

871 

872 refs = [] 

873 for visit in (2048, 2049, 2050): 

874 dataId = {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime", "band": "u"} 

875 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

876 datastore.put(metrics, ref) 

877 refs.append(ref) 

878 return datastore, refs 

879 

880 def testExportImportRecords(self) -> None: 

881 """Test for export_records and import_records methods.""" 

882 datastore, refs = self._populate_export_datastore("test_datastore") 

883 for exported_refs in (refs, refs[1:]): 

884 n_refs = len(exported_refs) 

885 records = datastore.export_records(exported_refs) 

886 self.assertGreater(len(records), 0) 

887 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

888 # In a ChainedDatastore each FileDatastore will have a complete set 

889 for datastore_name in records: 

890 record_data = records[datastore_name] 

891 self.assertEqual(len(record_data.records), n_refs) 

892 

893 # Check that subsetting works, include non-existing dataset ID. 

894 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

895 subset = record_data.subset(dataset_ids) 

896 assert subset is not None 

897 self.assertEqual(len(subset.records), 1) 

898 subset = record_data.subset({uuid.uuid4()}) 

899 self.assertIsNone(subset) 

900 

901 # Use the same datastore name to import relative path. 

902 datastore2 = self.makeDatastore("test_datastore") 

903 

904 records = datastore.export_records(refs[1:]) 

905 datastore2.import_records(records) 

906 

907 with self.assertRaises(FileNotFoundError): 

908 data = datastore2.get(refs[0]) 

909 data = datastore2.get(refs[1]) 

910 self.assertIsNotNone(data) 

911 data = datastore2.get(refs[2]) 

912 self.assertIsNotNone(data) 

913 

914 def testExport(self) -> None: 

915 datastore, refs = self._populate_export_datastore("test_datastore") 

916 

917 datasets = list(datastore.export(refs)) 

918 self.assertEqual(len(datasets), 3) 

919 

920 for transfer in (None, "auto"): 

921 # Both will default to None 

922 datasets = list(datastore.export(refs, transfer=transfer)) 

923 self.assertEqual(len(datasets), 3) 

924 

925 with self.assertRaises(TypeError): 

926 list(datastore.export(refs, transfer="copy")) 

927 

928 with self.assertRaises(TypeError): 

929 list(datastore.export(refs, directory="exportDir", transfer="move")) 

930 

931 # Create a new ref that is not known to the datastore and try to 

932 # export it. 

933 sc = self.storageClassFactory.getStorageClass("ThingOne") 

934 dimensions = self.universe.conform(("visit", "physical_filter")) 

935 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

936 ref = self.makeDatasetRef("metric", dimensions, sc, dataId) 

937 with self.assertRaises(FileNotFoundError): 

938 list(datastore.export(refs + [ref], transfer=None)) 

939 

940 def test_pydantic_dict_storage_class_conversions(self) -> None: 

941 """Test converting a dataset stored as a pydantic model into a dict on 

942 read. 

943 """ 

944 datastore = self.makeDatastore() 

945 store_as_model = self.makeDatasetRef( 

946 "store_as_model", 

947 dimensions=self.universe.empty, 

948 storageClass="DictConvertibleModel", 

949 dataId=DataCoordinate.make_empty(self.universe), 

950 ) 

951 content = {"a": "one", "b": "two"} 

952 model = DictConvertibleModel.from_dict(content, extra="original content") 

953 datastore.put(model, store_as_model) 

954 retrieved_model = datastore.get(store_as_model) 

955 self.assertEqual(retrieved_model, model) 

956 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

957 self.assertEqual(type(loaded), dict) 

958 self.assertEqual(loaded, content) 

959 

960 def test_simple_class_put_get(self) -> None: 

961 """Test that we can put and get a simple class with dict() 

962 constructor. 

963 """ 

964 datastore = self.makeDatastore() 

965 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

966 self._assert_different_puts(datastore, "MetricsExample", data) 

967 

968 def test_dataclass_put_get(self) -> None: 

969 """Test that we can put and get a simple dataclass.""" 

970 datastore = self.makeDatastore() 

971 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

972 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

973 

974 def test_pydantic_put_get(self) -> None: 

975 """Test that we can put and get a simple Pydantic model.""" 

976 datastore = self.makeDatastore() 

977 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

978 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

979 

980 def test_tuple_put_get(self) -> None: 

981 """Test that we can put and get a tuple.""" 

982 datastore = self.makeDatastore() 

983 data = ("a", "b", 1) 

984 self._assert_different_puts(datastore, "TupleExample", data) 

985 

986 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data: Any) -> None: 

987 refs = { 

988 x: self.makeDatasetRef( 

989 f"stora_as_{x}", 

990 dimensions=self.universe.empty, 

991 storageClass=f"{storageClass_root}{x}", 

992 dataId=DataCoordinate.make_empty(self.universe), 

993 ) 

994 for x in ["A", "B"] 

995 } 

996 

997 for ref in refs.values(): 

998 datastore.put(data, ref) 

999 

1000 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1001 

1002 

1003class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1004 """PosixDatastore specialization""" 

1005 

1006 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1007 uriScheme = "file" 

1008 canIngestNoTransferAuto = True 

1009 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1010 isEphemeral = False 

1011 rootKeys = ("root",) 

1012 validationCanFail = True 

1013 

1014 def setUp(self) -> None: 

1015 # The call to os.path.realpath is necessary because Mac temporary files 

1016 # can end up in either /private/var/folders or /var/folders, which 

1017 # refer to the same location but don't appear to. 

1018 # This matters for "relsymlink" transfer mode, because it needs to be 

1019 # able to read the file through a relative symlink, but some of the 

1020 # intermediate directories are not traversable if you try to get from a 

1021 # tempfile in /var/folders to one in /private/var/folders via a 

1022 # relative path. 

1023 self.root = os.path.realpath(self.enterContext(tempfile.TemporaryDirectory())) 

1024 super().setUp() 

1025 

1026 def testAtomicWrite(self) -> None: 

1027 """Test that we write to a temporary and then rename""" 

1028 datastore = self.makeDatastore() 

1029 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1030 dimensions = self.universe.conform(("visit", "physical_filter")) 

1031 metrics = makeExampleMetrics() 

1032 

1033 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1034 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1035 

1036 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1037 datastore.put(metrics, ref) 

1038 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1039 self.assertIn("transfer=move", move_logs[0]) 

1040 

1041 # And the transfer should be file to file. 

1042 self.assertEqual(move_logs[0].count("file://"), 2) 

1043 

1044 def testCanNotDeterminePutFormatterLocation(self) -> None: 

1045 """Verify that the expected exception is raised if the FileDatastore 

1046 can not determine the put formatter location. 

1047 """ 

1048 _ = makeExampleMetrics() 

1049 datastore = self.makeDatastore() 

1050 

1051 # Create multiple storage classes for testing different formulations 

1052 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1053 

1054 sccomp = StorageClass("Dummy") 

1055 compositeStorageClass = StorageClass( 

1056 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1057 ) 

1058 

1059 dimensions = self.universe.conform(("visit", "physical_filter")) 

1060 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1061 

1062 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1063 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId) 

1064 

1065 def raiser(ref: DatasetRef) -> None: 

1066 raise DatasetTypeNotSupportedError() 

1067 

1068 with unittest.mock.patch.object( 

1069 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1070 "_determine_put_formatter_location", 

1071 side_effect=raiser, 

1072 ): 

1073 # verify the non-composite ref execution path: 

1074 with self.assertRaises(DatasetTypeNotSupportedError): 

1075 datastore.getURIs(ref, predict=True) 

1076 

1077 # verify the composite-ref execution path: 

1078 with self.assertRaises(DatasetTypeNotSupportedError): 

1079 datastore.getURIs(compRef, predict=True) 

1080 

1081 def test_roots(self): 

1082 datastore = self.makeDatastore() 

1083 

1084 self.assertEqual(set(datastore.names), set(datastore.roots.keys())) 

1085 for root in datastore.roots.values(): 

1086 if root is not None: 

1087 self.assertTrue(root.exists()) 

1088 

1089 def test_prepare_get_for_external_client(self): 

1090 datastore = self.makeDatastore() 

1091 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1092 dimensions = self.universe.conform(("visit", "physical_filter")) 

1093 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1094 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1095 with self.assertRaises(FileNotFoundError): 

1096 # Most of the coverage for this function is in test_server.py, 

1097 # because it requires a file backend that supports URL signing. 

1098 datastore.prepare_get_for_external_client(ref) 

1099 

1100 

1101class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1102 """Posix datastore tests but with checksums disabled.""" 

1103 

1104 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1105 

1106 def testChecksum(self) -> None: 

1107 """Ensure that checksums have not been calculated.""" 

1108 datastore = self.makeDatastore() 

1109 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1110 dimensions = self.universe.conform(("visit", "physical_filter")) 

1111 metrics = makeExampleMetrics() 

1112 

1113 dataId = {"instrument": "dummy", "visit": 0, "physical_filter": "V", "band": "v"} 

1114 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1115 

1116 # Configuration should have disabled checksum calculation 

1117 datastore.put(metrics, ref) 

1118 infos = datastore.getStoredItemsInfo(ref) 

1119 self.assertIsNone(infos[0].checksum) 

1120 

1121 # Remove put back but with checksums enabled explicitly 

1122 datastore.remove(ref) 

1123 datastore.useChecksum = True 

1124 datastore.put(metrics, ref) 

1125 

1126 infos = datastore.getStoredItemsInfo(ref) 

1127 self.assertIsNotNone(infos[0].checksum) 

1128 

1129 def test_repeat_ingest(self): 

1130 """Test that repeatedly ingesting the same file in direct mode 

1131 is allowed. 

1132 

1133 Test can only run with FileDatastore since that is the only one 

1134 supporting "direct" ingest. 

1135 """ 

1136 metrics, v4ref = self._prepareIngestTest() 

1137 datastore = self.makeDatastore() 

1138 v5ref = DatasetRef( 

1139 v4ref.datasetType, v4ref.dataId, v4ref.run, id_generation_mode=DatasetIdGenEnum.DATAID_TYPE_RUN 

1140 ) 

1141 

1142 with _temp_yaml_file(metrics._asdict()) as path: 

1143 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1144 

1145 # This will fail because the ref is using UUIDv4. 

1146 with self.assertRaises(RuntimeError): 

1147 datastore.ingest(FileDataset(path=path, refs=v4ref), transfer="direct") 

1148 

1149 # UUIDv5 can be repeatedly ingested in direct mode. 

1150 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1151 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="direct") 

1152 

1153 with self.assertRaises(RuntimeError): 

1154 datastore.ingest(FileDataset(path=path, refs=v5ref), transfer="copy") 

1155 

1156 

1157class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1158 """Restrict trash test to FileDatastore.""" 

1159 

1160 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1161 

1162 def testTrash(self) -> None: 

1163 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1164 

1165 # Trash one of them. 

1166 ref = refs.pop() 

1167 uri = datastore.getURI(ref) 

1168 datastore.trash(ref) 

1169 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1170 datastore.emptyTrash() 

1171 self.assertFalse(uri.exists(), uri) 

1172 

1173 # Trash it again should be fine. 

1174 datastore.trash(ref) 

1175 

1176 # Trash multiple items at once. 

1177 subset = [refs.pop(), refs.pop()] 

1178 datastore.trash(subset) 

1179 datastore.emptyTrash() 

1180 

1181 # Remove a record and trash should do nothing. 

1182 # This is execution butler scenario. 

1183 ref = refs.pop() 

1184 uri = datastore.getURI(ref) 

1185 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1186 self.assertTrue(uri.exists()) 

1187 datastore.trash(ref) 

1188 datastore.emptyTrash() 

1189 self.assertTrue(uri.exists()) 

1190 

1191 # Switch on trust and it should delete the file. 

1192 datastore.trustGetRequest = True 

1193 datastore.trash([ref]) 

1194 self.assertFalse(uri.exists()) 

1195 

1196 # Remove multiples at once in trust mode. 

1197 subset = [refs.pop() for i in range(3)] 

1198 datastore.trash(subset) 

1199 datastore.trash(refs.pop()) # Check that a single ref can trash 

1200 

1201 

1202class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1203 """Test datastore cleans up on failure.""" 

1204 

1205 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1206 

1207 def setUp(self) -> None: 

1208 # Override the working directory before calling the base class 

1209 self.root = tempfile.mkdtemp() 

1210 super().setUp() 

1211 

1212 def testCleanup(self) -> None: 

1213 """Test that a failed formatter write does cleanup a partial file.""" 

1214 metrics = makeExampleMetrics() 

1215 datastore = self.makeDatastore() 

1216 

1217 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1218 

1219 dimensions = self.universe.conform(("visit", "physical_filter")) 

1220 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1221 

1222 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId) 

1223 

1224 # Determine where the file will end up (we assume Formatters use 

1225 # the same file extension) 

1226 expectedUri = datastore.getURI(ref, predict=True) 

1227 self.assertEqual(expectedUri.fragment, "predicted") 

1228 

1229 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1230 

1231 # Try formatter that fails and formatter that fails and leaves 

1232 # a file behind 

1233 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1234 with self.subTest(formatter=formatter): 

1235 # Monkey patch the formatter 

1236 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1237 

1238 # Try to put the dataset, it should fail 

1239 with self.assertRaises(RuntimeError): 

1240 datastore.put(metrics, ref) 

1241 

1242 # Check that there is no file on disk 

1243 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1244 

1245 # Check that there is a directory 

1246 dir = expectedUri.dirname() 

1247 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1248 

1249 # Force YamlFormatter and check that this time a file is written 

1250 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1251 datastore.put(metrics, ref) 

1252 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1253 datastore.remove(ref) 

1254 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1255 

1256 

1257class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1258 """PosixDatastore specialization""" 

1259 

1260 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1261 uriScheme = "mem" 

1262 hasUnsupportedPut = False 

1263 ingestTransferModes = () 

1264 isEphemeral = True 

1265 rootKeys = None 

1266 validationCanFail = False 

1267 

1268 

1269class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1270 """ChainedDatastore specialization using a POSIXDatastore""" 

1271 

1272 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1273 hasUnsupportedPut = False 

1274 canIngestNoTransferAuto = False 

1275 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1276 isEphemeral = False 

1277 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1278 validationCanFail = True 

1279 

1280 

1281class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1282 """ChainedDatastore specialization using all InMemoryDatastore""" 

1283 

1284 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1285 validationCanFail = False 

1286 

1287 

1288class DatastoreConstraintsTests(DatastoreTestsBase): 

1289 """Basic tests of constraints model of Datastores.""" 

1290 

1291 def testConstraints(self) -> None: 

1292 """Test constraints model. Assumes that each test class has the 

1293 same constraints. 

1294 """ 

1295 metrics = makeExampleMetrics() 

1296 datastore = self.makeDatastore() 

1297 

1298 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1299 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1300 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1301 dataId = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1302 

1303 # Write empty file suitable for ingest check (JSON and YAML variants) 

1304 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1305 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1306 for datasetTypeName, sc, accepted in ( 

1307 ("metric", sc1, True), 

1308 ("metric5", sc1, False), 

1309 ("metric33", sc1, True), 

1310 ("metric5", sc2, True), 

1311 ): 

1312 # Choose different temp file depending on StorageClass 

1313 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1314 

1315 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1316 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId) 

1317 if accepted: 

1318 datastore.put(metrics, ref) 

1319 self.assertTrue(datastore.exists(ref)) 

1320 datastore.remove(ref) 

1321 

1322 # Try ingest 

1323 if self.canIngest: 

1324 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1325 self.assertTrue(datastore.exists(ref)) 

1326 datastore.remove(ref) 

1327 else: 

1328 with self.assertRaises(DatasetTypeNotSupportedError): 

1329 datastore.put(metrics, ref) 

1330 self.assertFalse(datastore.exists(ref)) 

1331 

1332 # Again with ingest 

1333 if self.canIngest: 

1334 with self.assertRaises(DatasetTypeNotSupportedError): 

1335 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1336 self.assertFalse(datastore.exists(ref)) 

1337 

1338 

1339class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1340 """PosixDatastore specialization""" 

1341 

1342 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1343 canIngest = True 

1344 

1345 def setUp(self) -> None: 

1346 # Override the working directory before calling the base class 

1347 self.root = tempfile.mkdtemp() 

1348 super().setUp() 

1349 

1350 

1351class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1352 """InMemoryDatastore specialization.""" 

1353 

1354 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1355 canIngest = False 

1356 

1357 

1358class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1359 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1360 at the ChainedDatstore. 

1361 """ 

1362 

1363 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1364 

1365 

1366class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1367 """ChainedDatastore specialization using a POSIXDatastore.""" 

1368 

1369 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1370 

1371 

1372class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1373 """ChainedDatastore specialization using all InMemoryDatastore.""" 

1374 

1375 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1376 canIngest = False 

1377 

1378 

1379class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1380 """Test that a chained datastore can control constraints per-datastore 

1381 even if child datastore would accept. 

1382 """ 

1383 

1384 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1385 

1386 def setUp(self) -> None: 

1387 # Override the working directory before calling the base class 

1388 self.root = tempfile.mkdtemp() 

1389 super().setUp() 

1390 

1391 def testConstraints(self) -> None: 

1392 """Test chained datastore constraints model.""" 

1393 metrics = makeExampleMetrics() 

1394 datastore = self.makeDatastore() 

1395 

1396 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1397 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1398 dimensions = self.universe.conform(("visit", "physical_filter", "instrument")) 

1399 dataId1 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "DummyCamComp"} 

1400 dataId2 = {"visit": 52, "physical_filter": "V", "band": "v", "instrument": "HSC"} 

1401 

1402 # Write empty file suitable for ingest check (JSON and YAML variants) 

1403 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1404 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1405 

1406 for typeName, dataId, sc, accept, ingest in ( 

1407 ("metric", dataId1, sc1, (False, True, False), True), 

1408 ("metric5", dataId1, sc1, (False, False, False), False), 

1409 ("metric5", dataId2, sc1, (True, False, False), False), 

1410 ("metric33", dataId2, sc2, (True, True, False), True), 

1411 ("metric5", dataId1, sc2, (False, True, False), True), 

1412 ): 

1413 # Choose different temp file depending on StorageClass 

1414 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1415 

1416 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1417 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId) 

1418 if any(accept): 

1419 datastore.put(metrics, ref) 

1420 self.assertTrue(datastore.exists(ref)) 

1421 

1422 # Check each datastore inside the chained datastore 

1423 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1424 self.assertEqual( 

1425 childDatastore.exists(ref), 

1426 expected, 

1427 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1428 ) 

1429 

1430 datastore.remove(ref) 

1431 

1432 # Check that ingest works 

1433 if ingest: 

1434 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1435 self.assertTrue(datastore.exists(ref)) 

1436 

1437 # Check each datastore inside the chained datastore 

1438 for childDatastore, expected in zip(datastore.datastores, accept, strict=True): 

1439 # Ephemeral datastores means InMemory at the moment 

1440 # and that does not accept ingest of files. 

1441 if childDatastore.isEphemeral: 

1442 expected = False 

1443 self.assertEqual( 

1444 childDatastore.exists(ref), 

1445 expected, 

1446 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1447 ) 

1448 

1449 datastore.remove(ref) 

1450 else: 

1451 with self.assertRaises(DatasetTypeNotSupportedError): 

1452 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1453 

1454 else: 

1455 with self.assertRaises(DatasetTypeNotSupportedError): 

1456 datastore.put(metrics, ref) 

1457 self.assertFalse(datastore.exists(ref)) 

1458 

1459 # Again with ingest 

1460 with self.assertRaises(DatasetTypeNotSupportedError): 

1461 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1462 self.assertFalse(datastore.exists(ref)) 

1463 

1464 

1465class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1466 """Tests for datastore caching infrastructure.""" 

1467 

1468 @classmethod 

1469 def setUpClass(cls) -> None: 

1470 cls.storageClassFactory = StorageClassFactory() 

1471 cls.universe = DimensionUniverse() 

1472 

1473 # Ensure that we load the test storage class definitions. 

1474 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1475 cls.storageClassFactory.addFromConfig(scConfigFile) 

1476 

1477 def setUp(self) -> None: 

1478 self.id = 0 

1479 

1480 # Create a root that we can use for caching tests. 

1481 self.root = tempfile.mkdtemp() 

1482 

1483 # Create some test dataset refs and associated test files 

1484 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1485 dimensions = self.universe.conform(("visit", "physical_filter")) 

1486 dataId = {"instrument": "dummy", "visit": 52, "physical_filter": "V", "band": "v"} 

1487 

1488 # Create list of refs and list of temporary files 

1489 n_datasets = 10 

1490 self.refs = [self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId) for n in range(n_datasets)] 

1491 

1492 root_uri = ResourcePath(self.root, forceDirectory=True) 

1493 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1494 

1495 # Create test files. 

1496 for uri in self.files: 

1497 uri.write(b"0123456789") 

1498 

1499 # Create some composite refs with component files. 

1500 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1501 self.composite_refs = [self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId) for n in range(3)] 

1502 self.comp_files = [] 

1503 self.comp_refs = [] 

1504 for n, ref in enumerate(self.composite_refs): 

1505 component_refs = [] 

1506 component_files = [] 

1507 for component in sc.components: 

1508 component_ref = ref.makeComponentRef(component) 

1509 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1510 component_refs.append(component_ref) 

1511 component_files.append(file) 

1512 file.write(b"9876543210") 

1513 

1514 self.comp_files.append(component_files) 

1515 self.comp_refs.append(component_refs) 

1516 

1517 def tearDown(self) -> None: 

1518 if self.root is not None and os.path.exists(self.root): 

1519 shutil.rmtree(self.root, ignore_errors=True) 

1520 

1521 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1522 config = Config.fromYaml(config_str) 

1523 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1524 

1525 def testNoCacheDir(self) -> None: 

1526 config_str = """ 

1527cached: 

1528 root: null 

1529 cacheable: 

1530 metric0: true 

1531 """ 

1532 cache_manager = self._make_cache_manager(config_str) 

1533 

1534 # Look inside to check we don't have a cache directory 

1535 self.assertIsNone(cache_manager._cache_directory) 

1536 

1537 self.assertCache(cache_manager) 

1538 

1539 # Test that the cache directory is marked temporary 

1540 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1541 

1542 def testNoCacheDirReversed(self) -> None: 

1543 """Use default caching status and metric1 to false""" 

1544 config_str = """ 

1545cached: 

1546 root: null 

1547 default: true 

1548 cacheable: 

1549 metric1: false 

1550 """ 

1551 cache_manager = self._make_cache_manager(config_str) 

1552 

1553 self.assertCache(cache_manager) 

1554 

1555 def testEnvvarCacheDir(self) -> None: 

1556 config_str = f""" 

1557cached: 

1558 root: '{self.root}' 

1559 cacheable: 

1560 metric0: true 

1561 """ 

1562 

1563 root = ResourcePath(self.root, forceDirectory=True) 

1564 env_dir = root.join("somewhere", forceDirectory=True) 

1565 elsewhere = root.join("elsewhere", forceDirectory=True) 

1566 

1567 # Environment variable should override the config value. 

1568 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1569 cache_manager = self._make_cache_manager(config_str) 

1570 self.assertEqual(cache_manager.cache_directory, env_dir) 

1571 

1572 # This environment variable should not override the config value. 

1573 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1574 cache_manager = self._make_cache_manager(config_str) 

1575 self.assertEqual(cache_manager.cache_directory, root) 

1576 

1577 # No default setting. 

1578 config_str = """ 

1579cached: 

1580 root: null 

1581 default: true 

1582 cacheable: 

1583 metric1: false 

1584 """ 

1585 cache_manager = self._make_cache_manager(config_str) 

1586 

1587 # This environment variable should override the config value. 

1588 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1589 cache_manager = self._make_cache_manager(config_str) 

1590 self.assertEqual(cache_manager.cache_directory, env_dir) 

1591 

1592 # If both environment variables are set the main (not IF_UNSET) 

1593 # variable should win. 

1594 with unittest.mock.patch.dict( 

1595 os.environ, 

1596 { 

1597 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1598 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1599 }, 

1600 ): 

1601 cache_manager = self._make_cache_manager(config_str) 

1602 self.assertEqual(cache_manager.cache_directory, env_dir) 

1603 

1604 # Use the API to set the environment variable, making sure that the 

1605 # variable is reset on exit. 

1606 with unittest.mock.patch.dict( 

1607 os.environ, 

1608 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1609 ): 

1610 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1611 self.assertTrue(defined) 

1612 cache_manager = self._make_cache_manager(config_str) 

1613 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1614 

1615 # Now create the cache manager ahead of time and set the fallback 

1616 # later. 

1617 cache_manager = self._make_cache_manager(config_str) 

1618 self.assertIsNone(cache_manager._cache_directory) 

1619 with unittest.mock.patch.dict( 

1620 os.environ, 

1621 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1622 ): 

1623 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1624 self.assertTrue(defined) 

1625 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1626 

1627 def testExplicitCacheDir(self) -> None: 

1628 config_str = f""" 

1629cached: 

1630 root: '{self.root}' 

1631 cacheable: 

1632 metric0: true 

1633 """ 

1634 cache_manager = self._make_cache_manager(config_str) 

1635 

1636 # Look inside to check we do have a cache directory. 

1637 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1638 

1639 self.assertCache(cache_manager) 

1640 

1641 # Test that the cache directory is not marked temporary 

1642 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1643 

1644 def assertCache(self, cache_manager: DatastoreCacheManager) -> None: 

1645 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1646 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1647 

1648 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1649 self.assertIsInstance(uri, ResourcePath) 

1650 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1651 

1652 # Check presence in cache using ref and then using file extension. 

1653 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1654 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1655 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1656 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1657 

1658 # Cached file should no longer exist but uncached file should be 

1659 # unaffected. 

1660 self.assertFalse(self.files[0].exists()) 

1661 self.assertTrue(self.files[1].exists()) 

1662 

1663 # Should find this file and it should be within the cache directory. 

1664 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1665 self.assertTrue(found.exists()) 

1666 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1667 

1668 # Should not be able to find these in cache 

1669 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1670 self.assertIsNone(found) 

1671 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1672 self.assertIsNone(found) 

1673 

1674 def testNoCache(self) -> None: 

1675 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1676 for uri, ref in zip(self.files, self.refs, strict=True): 

1677 self.assertFalse(cache_manager.should_be_cached(ref)) 

1678 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1679 self.assertFalse(cache_manager.known_to_cache(ref)) 

1680 with cache_manager.find_in_cache(ref, ".txt") as found: 

1681 self.assertIsNone(found, msg=f"{cache_manager}") 

1682 

1683 def _expiration_config(self, mode: str, threshold: int) -> str: 

1684 return f""" 

1685cached: 

1686 default: true 

1687 expiry: 

1688 mode: {mode} 

1689 threshold: {threshold} 

1690 cacheable: 

1691 unused: true 

1692 """ 

1693 

1694 def testCacheExpiryFiles(self) -> None: 

1695 threshold = 2 # Keep at least 2 files. 

1696 mode = "files" 

1697 config_str = self._expiration_config(mode, threshold) 

1698 

1699 cache_manager = self._make_cache_manager(config_str) 

1700 

1701 # Check that an empty cache returns unknown for arbitrary ref 

1702 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1703 

1704 # Should end with datasets: 2, 3, 4 

1705 self.assertExpiration(cache_manager, 5, threshold + 1) 

1706 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1707 

1708 # Check that we will not expire a file that is actively in use. 

1709 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1710 self.assertIsNotNone(found) 

1711 

1712 # Trigger cache expiration that should remove the file 

1713 # we just retrieved. Should now have: 3, 4, 5 

1714 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1715 self.assertIsNotNone(cached) 

1716 

1717 # Cache should still report the standard file count. 

1718 self.assertEqual(cache_manager.file_count, threshold + 1) 

1719 

1720 # Add additional entry to cache. 

1721 # Should now have 4, 5, 6 

1722 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1723 self.assertIsNotNone(cached) 

1724 

1725 # Is the file still there? 

1726 self.assertTrue(found.exists()) 

1727 

1728 # Can we read it? 

1729 data = found.read() 

1730 self.assertGreater(len(data), 0) 

1731 

1732 # Outside context the file should no longer exist. 

1733 self.assertFalse(found.exists()) 

1734 

1735 # File count should not have changed. 

1736 self.assertEqual(cache_manager.file_count, threshold + 1) 

1737 

1738 # Dataset 2 was in the exempt directory but because hardlinks 

1739 # are used it was deleted from the main cache during cache expiry 

1740 # above and so should no longer be found. 

1741 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1742 self.assertIsNone(found) 

1743 

1744 # And the one stored after it is also gone. 

1745 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1746 self.assertIsNone(found) 

1747 

1748 # But dataset 4 is present. 

1749 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1750 self.assertIsNotNone(found) 

1751 

1752 # Adding a new dataset to the cache should now delete it. 

1753 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1754 

1755 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1756 self.assertIsNone(found) 

1757 

1758 def testCacheExpiryDatasets(self) -> None: 

1759 threshold = 2 # Keep 2 datasets. 

1760 mode = "datasets" 

1761 config_str = self._expiration_config(mode, threshold) 

1762 

1763 cache_manager = self._make_cache_manager(config_str) 

1764 self.assertExpiration(cache_manager, 5, threshold + 1) 

1765 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1766 

1767 def testCacheExpiryDatasetsComposite(self) -> None: 

1768 threshold = 2 # Keep 2 datasets. 

1769 mode = "datasets" 

1770 config_str = self._expiration_config(mode, threshold) 

1771 

1772 cache_manager = self._make_cache_manager(config_str) 

1773 

1774 n_datasets = 3 

1775 for i in range(n_datasets): 

1776 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i], strict=True): 

1777 cached = cache_manager.move_to_cache(component_file, component_ref) 

1778 self.assertIsNotNone(cached) 

1779 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1780 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1781 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1782 

1783 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1784 

1785 # Write two new non-composite and the number of files should drop. 

1786 self.assertExpiration(cache_manager, 2, 5) 

1787 

1788 def testCacheExpirySize(self) -> None: 

1789 threshold = 55 # Each file is 10 bytes 

1790 mode = "size" 

1791 config_str = self._expiration_config(mode, threshold) 

1792 

1793 cache_manager = self._make_cache_manager(config_str) 

1794 self.assertExpiration(cache_manager, 10, 6) 

1795 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1796 

1797 def assertExpiration( 

1798 self, cache_manager: DatastoreCacheManager, n_datasets: int, n_retained: int 

1799 ) -> None: 

1800 """Insert the datasets and then check the number retained.""" 

1801 for i in range(n_datasets): 

1802 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1803 self.assertIsNotNone(cached) 

1804 

1805 self.assertEqual(cache_manager.file_count, n_retained) 

1806 

1807 # The oldest file should not be in the cache any more. 

1808 for i in range(n_datasets): 

1809 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1810 if i >= n_datasets - n_retained: 

1811 self.assertIsInstance(found, ResourcePath) 

1812 else: 

1813 self.assertIsNone(found) 

1814 

1815 def testCacheExpiryAge(self) -> None: 

1816 threshold = 1 # Expire older than 2 seconds 

1817 mode = "age" 

1818 config_str = self._expiration_config(mode, threshold) 

1819 

1820 cache_manager = self._make_cache_manager(config_str) 

1821 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1822 

1823 # Insert 3 files, then sleep, then insert more. 

1824 for i in range(2): 

1825 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1826 self.assertIsNotNone(cached) 

1827 time.sleep(2.0) 

1828 for j in range(4): 

1829 i = 2 + j # Continue the counting 

1830 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1831 self.assertIsNotNone(cached) 

1832 

1833 # Only the files written after the sleep should exist. 

1834 self.assertEqual(cache_manager.file_count, 4) 

1835 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1836 self.assertIsNone(found) 

1837 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1838 self.assertIsInstance(found, ResourcePath) 

1839 

1840 

1841class NullDatastoreTestCase(DatasetTestHelper, unittest.TestCase): 

1842 """Test the null datastore.""" 

1843 

1844 storageClassFactory = StorageClassFactory() 

1845 

1846 def test_basics(self) -> None: 

1847 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1848 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1849 

1850 null = NullDatastore(None, None) 

1851 

1852 self.assertFalse(null.exists(ref)) 

1853 self.assertFalse(null.knows(ref)) 

1854 knows = null.knows_these([ref]) 

1855 self.assertFalse(knows[ref]) 

1856 null.validateConfiguration(ref) 

1857 

1858 with self.assertRaises(FileNotFoundError): 

1859 null.get(ref) 

1860 with self.assertRaises(NotImplementedError): 

1861 null.put("", ref) 

1862 with self.assertRaises(FileNotFoundError): 

1863 null.getURI(ref) 

1864 with self.assertRaises(FileNotFoundError): 

1865 null.getURIs(ref) 

1866 with self.assertRaises(FileNotFoundError): 

1867 null.getManyURIs([ref]) 

1868 with self.assertRaises(NotImplementedError): 

1869 null.getLookupKeys() 

1870 with self.assertRaises(NotImplementedError): 

1871 null.import_records({}) 

1872 with self.assertRaises(NotImplementedError): 

1873 null.export_records([]) 

1874 with self.assertRaises(NotImplementedError): 

1875 null.export([ref]) 

1876 with self.assertRaises(NotImplementedError): 

1877 null.transfer(null, ref) 

1878 with self.assertRaises(NotImplementedError): 

1879 null.emptyTrash() 

1880 with self.assertRaises(NotImplementedError): 

1881 null.trash(ref) 

1882 with self.assertRaises(NotImplementedError): 

1883 null.forget([ref]) 

1884 with self.assertRaises(NotImplementedError): 

1885 null.remove(ref) 

1886 with self.assertRaises(NotImplementedError): 

1887 null.retrieveArtifacts([ref], ResourcePath(".")) 

1888 with self.assertRaises(NotImplementedError): 

1889 null.transfer_from(null, [ref]) 

1890 with self.assertRaises(NotImplementedError): 

1891 null.ingest() 

1892 

1893 

1894class DatasetRefURIsTestCase(unittest.TestCase): 

1895 """Tests for DatasetRefURIs.""" 

1896 

1897 def testSequenceAccess(self) -> None: 

1898 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1899 uris = DatasetRefURIs() 

1900 

1901 self.assertEqual(len(uris), 2) 

1902 self.assertEqual(uris[0], None) 

1903 self.assertEqual(uris[1], {}) 

1904 

1905 primaryURI = ResourcePath("1/2/3") 

1906 componentURI = ResourcePath("a/b/c") 

1907 

1908 # affirm that DatasetRefURIs does not support MutableSequence functions 

1909 with self.assertRaises(TypeError): 

1910 uris[0] = primaryURI 

1911 with self.assertRaises(TypeError): 

1912 uris[1] = {"foo": componentURI} 

1913 

1914 # but DatasetRefURIs can be set by property name: 

1915 uris.primaryURI = primaryURI 

1916 uris.componentURIs = {"foo": componentURI} 

1917 self.assertEqual(uris.primaryURI, primaryURI) 

1918 self.assertEqual(uris[0], primaryURI) 

1919 

1920 primary, components = uris 

1921 self.assertEqual(primary, primaryURI) 

1922 self.assertEqual(components, {"foo": componentURI}) 

1923 

1924 def testRepr(self) -> None: 

1925 """Verify __repr__ output.""" 

1926 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1927 self.assertEqual( 

1928 repr(uris), 

1929 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1930 ) 

1931 

1932 

1933class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1934 """Test the StoredFileInfo class.""" 

1935 

1936 storageClassFactory = StorageClassFactory() 

1937 

1938 def test_StoredFileInfo(self) -> None: 

1939 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1940 ref = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1941 

1942 record = dict( 

1943 storage_class="StructuredDataDict", 

1944 formatter="lsst.daf.butler.Formatter", 

1945 path="a/b/c.txt", 

1946 component="component", 

1947 checksum=None, 

1948 file_size=5, 

1949 ) 

1950 info = StoredFileInfo.from_record(record) 

1951 

1952 self.assertEqual(info.to_record(), record) 

1953 

1954 ref2 = self.makeDatasetRef("metric", DimensionUniverse().empty, storageClass, {}) 

1955 rebased = info.rebase(ref2) 

1956 self.assertEqual(rebased.rebase(ref), info) 

1957 

1958 with self.assertRaises(TypeError): 

1959 rebased.update(formatter=42) 

1960 

1961 with self.assertRaises(ValueError): 

1962 rebased.update(something=42, new="42") 

1963 

1964 # Check that pickle works on StoredFileInfo. 

1965 pickled_info = pickle.dumps(info) 

1966 unpickled_info = pickle.loads(pickled_info) 

1967 self.assertEqual(unpickled_info, info) 

1968 

1969 

1970@contextlib.contextmanager 

1971def _temp_yaml_file(data: Any) -> Iterator[str]: 

1972 fh = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml") 

1973 try: 

1974 yaml.dump(data, stream=fh) 

1975 fh.flush() 

1976 yield fh.name 

1977 finally: 

1978 # Some tests delete the file 

1979 with contextlib.suppress(FileNotFoundError): 

1980 fh.close() 

1981 

1982 

1983if __name__ == "__main__": 

1984 unittest.main()