Coverage for tests/test_datastore.py: 12%

1099 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-14 09:11 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24import os 

25import pickle 

26import shutil 

27import sys 

28import tempfile 

29import time 

30import unittest 

31import unittest.mock 

32import uuid 

33from collections import UserDict 

34from dataclasses import dataclass 

35 

36import lsst.utils.tests 

37import yaml 

38from lsst.daf.butler import ( 

39 Config, 

40 DataCoordinate, 

41 DatasetRef, 

42 DatasetRefURIs, 

43 DatasetType, 

44 DatasetTypeNotSupportedError, 

45 Datastore, 

46 DatastoreCacheManager, 

47 DatastoreCacheManagerConfig, 

48 DatastoreConfig, 

49 DatastoreDisabledCacheManager, 

50 DatastoreValidationError, 

51 DimensionUniverse, 

52 FileDataset, 

53 NamedKeyDict, 

54 StorageClass, 

55 StorageClassFactory, 

56 StoredFileInfo, 

57) 

58from lsst.daf.butler.formatters.yaml import YamlFormatter 

59from lsst.daf.butler.tests import ( 

60 BadNoWriteFormatter, 

61 BadWriteFormatter, 

62 DatasetTestHelper, 

63 DatastoreTestHelper, 

64 DummyRegistry, 

65 MetricsExample, 

66 MetricsExampleDataclass, 

67 MetricsExampleModel, 

68) 

69from lsst.daf.butler.tests.dict_convertible_model import DictConvertibleModel 

70from lsst.resources import ResourcePath 

71from lsst.utils import doImport 

72 

73TESTDIR = os.path.dirname(__file__) 

74 

75 

76class DataIdForTest(UserDict): 

77 

78 """A dict-like class that can be used for a DataId dict that is hashable. 

79 

80 By default the class is immutable ("frozen"). The `frozen` 

81 attribute can be set to `False` to change values (but note that 

82 the hash values before and after mutation will be different!). 

83 """ 

84 

85 def __init__(self, *args, **kwargs): 

86 self.frozen = False 

87 super().__init__(*args, **kwargs) 

88 self.frozen = True 

89 

90 def __hash__(self): 

91 return hash(str(self.data)) 

92 

93 def __setitem__(self, k, v): 

94 if self.frozen: 

95 raise RuntimeError("DataIdForTest is frozen.") 

96 return super().__setitem__(k, v) 

97 

98 def __delitem__(self, k): 

99 if self.frozen: 

100 raise RuntimeError("DataIdForTest is frozen.") 

101 return super().__delitem__(k) 

102 

103 def __ior__(self, other): 

104 assert sys.version_info[0] == 3 

105 if sys.version_info[1] < 9: 

106 raise NotImplementedError("operator |= (ior) is not supported before version 3.9") 

107 if self.frozen: 

108 raise RuntimeError("DataIdForTest is frozen.") 

109 return super().__ior__(other) 

110 

111 def pop(self, k): 

112 if self.frozen: 

113 raise RuntimeError("DataIdForTest is frozen.") 

114 return super().pop(k) 

115 

116 def popitem(self): 

117 if self.frozen: 

118 raise RuntimeError("DataIdForTest is frozen.") 

119 return super().popitem() 

120 

121 def update(self, *args, **kwargs): 

122 if self.frozen: 

123 raise RuntimeError("DataIdForTest is frozen.") 

124 super().update(*args, **kwargs) 

125 

126 

127def makeExampleMetrics(use_none=False): 

128 if use_none: 

129 array = None 

130 else: 

131 array = [563, 234, 456.7, 105, 2054, -1045] 

132 return MetricsExample( 

133 {"AM1": 5.2, "AM2": 30.6}, 

134 {"a": [1, 2, 3], "b": {"blue": 5, "red": "green"}}, 

135 array, 

136 ) 

137 

138 

139@dataclass(frozen=True) 

140class Named: 

141 name: str 

142 

143 

144class FakeDataCoordinate(NamedKeyDict): 

145 """A fake hashable frozen DataCoordinate built from a simple dict.""" 

146 

147 @classmethod 

148 def from_dict(cls, dataId): 

149 new = cls() 

150 for k, v in dataId.items(): 

151 new[Named(k)] = v 

152 return new.freeze() 

153 

154 def __hash__(self) -> int: 

155 return hash(frozenset(self.items())) 

156 

157 

158class TransactionTestError(Exception): 

159 """Specific error for transactions, to prevent misdiagnosing 

160 that might otherwise occur when a standard exception is used. 

161 """ 

162 

163 pass 

164 

165 

166class DatastoreTestsBase(DatasetTestHelper, DatastoreTestHelper): 

167 """Support routines for datastore testing""" 

168 

169 root = None 

170 

171 @classmethod 

172 def setUpClass(cls): 

173 # Storage Classes are fixed for all datastores in these tests 

174 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

175 cls.storageClassFactory = StorageClassFactory() 

176 cls.storageClassFactory.addFromConfig(scConfigFile) 

177 

178 # Read the Datastore config so we can get the class 

179 # information (since we should not assume the constructor 

180 # name here, but rely on the configuration file itself) 

181 datastoreConfig = DatastoreConfig(cls.configFile) 

182 cls.datastoreType = doImport(datastoreConfig["cls"]) 

183 cls.universe = DimensionUniverse() 

184 

185 def setUp(self): 

186 self.setUpDatastoreTests(DummyRegistry, DatastoreConfig) 

187 

188 def tearDown(self): 

189 if self.root is not None and os.path.exists(self.root): 

190 shutil.rmtree(self.root, ignore_errors=True) 

191 

192 

193class DatastoreTests(DatastoreTestsBase): 

194 """Some basic tests of a simple datastore.""" 

195 

196 hasUnsupportedPut = True 

197 

198 def testConfigRoot(self): 

199 full = DatastoreConfig(self.configFile) 

200 config = DatastoreConfig(self.configFile, mergeDefaults=False) 

201 newroot = "/random/location" 

202 self.datastoreType.setConfigRoot(newroot, config, full) 

203 if self.rootKeys: 

204 for k in self.rootKeys: 

205 self.assertIn(newroot, config[k]) 

206 

207 def testConstructor(self): 

208 datastore = self.makeDatastore() 

209 self.assertIsNotNone(datastore) 

210 self.assertIs(datastore.isEphemeral, self.isEphemeral) 

211 

212 def testConfigurationValidation(self): 

213 datastore = self.makeDatastore() 

214 sc = self.storageClassFactory.getStorageClass("ThingOne") 

215 datastore.validateConfiguration([sc]) 

216 

217 sc2 = self.storageClassFactory.getStorageClass("ThingTwo") 

218 if self.validationCanFail: 

219 with self.assertRaises(DatastoreValidationError): 

220 datastore.validateConfiguration([sc2], logFailures=True) 

221 

222 dimensions = self.universe.extract(("visit", "physical_filter")) 

223 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

224 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

225 datastore.validateConfiguration([ref]) 

226 

227 def testParameterValidation(self): 

228 """Check that parameters are validated""" 

229 sc = self.storageClassFactory.getStorageClass("ThingOne") 

230 dimensions = self.universe.extract(("visit", "physical_filter")) 

231 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

232 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

233 datastore = self.makeDatastore() 

234 data = {1: 2, 3: 4} 

235 datastore.put(data, ref) 

236 newdata = datastore.get(ref) 

237 self.assertEqual(data, newdata) 

238 with self.assertRaises(KeyError): 

239 newdata = datastore.get(ref, parameters={"missing": 5}) 

240 

241 def testBasicPutGet(self): 

242 metrics = makeExampleMetrics() 

243 datastore = self.makeDatastore() 

244 

245 # Create multiple storage classes for testing different formulations 

246 storageClasses = [ 

247 self.storageClassFactory.getStorageClass(sc) 

248 for sc in ("StructuredData", "StructuredDataJson", "StructuredDataPickle") 

249 ] 

250 

251 dimensions = self.universe.extract(("visit", "physical_filter")) 

252 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

253 dataId2 = DataIdForTest({"instrument": "dummy", "visit": 53, "physical_filter": "V"}) 

254 

255 for sc in storageClasses: 

256 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

257 ref2 = self.makeDatasetRef("metric", dimensions, sc, dataId2, conform=False) 

258 

259 # Make sure that using getManyURIs without predicting before the 

260 # dataset has been put raises. 

261 with self.assertRaises(FileNotFoundError): 

262 datastore.getManyURIs([ref], predict=False) 

263 

264 # Make sure that using getManyURIs with predicting before the 

265 # dataset has been put predicts the URI. 

266 uris = datastore.getManyURIs([ref, ref2], predict=True) 

267 self.assertIn("52", uris[ref].primaryURI.geturl()) 

268 self.assertIn("#predicted", uris[ref].primaryURI.geturl()) 

269 self.assertIn("53", uris[ref2].primaryURI.geturl()) 

270 self.assertIn("#predicted", uris[ref2].primaryURI.geturl()) 

271 

272 datastore.put(metrics, ref) 

273 

274 # Does it exist? 

275 self.assertTrue(datastore.exists(ref)) 

276 self.assertTrue(datastore.knows(ref)) 

277 multi = datastore.knows_these([ref]) 

278 self.assertTrue(multi[ref]) 

279 multi = datastore.mexists([ref, ref2]) 

280 self.assertTrue(multi[ref]) 

281 self.assertFalse(multi[ref2]) 

282 

283 # Get 

284 metricsOut = datastore.get(ref, parameters=None) 

285 self.assertEqual(metrics, metricsOut) 

286 

287 uri = datastore.getURI(ref) 

288 self.assertEqual(uri.scheme, self.uriScheme) 

289 

290 uris = datastore.getManyURIs([ref]) 

291 self.assertEqual(len(uris), 1) 

292 ref, uri = uris.popitem() 

293 self.assertTrue(uri.primaryURI.exists()) 

294 self.assertFalse(uri.componentURIs) 

295 

296 # Get a component -- we need to construct new refs for them 

297 # with derived storage classes but with parent ID 

298 for comp in ("data", "output"): 

299 compRef = ref.makeComponentRef(comp) 

300 output = datastore.get(compRef) 

301 self.assertEqual(output, getattr(metricsOut, comp)) 

302 

303 uri = datastore.getURI(compRef) 

304 self.assertEqual(uri.scheme, self.uriScheme) 

305 

306 uris = datastore.getManyURIs([compRef]) 

307 self.assertEqual(len(uris), 1) 

308 

309 storageClass = sc 

310 

311 # Check that we can put a metric with None in a component and 

312 # get it back as None 

313 metricsNone = makeExampleMetrics(use_none=True) 

314 dataIdNone = {"instrument": "dummy", "visit": 54, "physical_filter": "V"} 

315 refNone = self.makeDatasetRef("metric", dimensions, sc, dataIdNone, conform=False) 

316 datastore.put(metricsNone, refNone) 

317 

318 comp = "data" 

319 for comp in ("data", "output"): 

320 compRef = refNone.makeComponentRef(comp) 

321 output = datastore.get(compRef) 

322 self.assertEqual(output, getattr(metricsNone, comp)) 

323 

324 # Check that a put fails if the dataset type is not supported 

325 if self.hasUnsupportedPut: 

326 sc = StorageClass("UnsupportedSC", pytype=type(metrics)) 

327 ref = self.makeDatasetRef("unsupportedType", dimensions, sc, dataId) 

328 with self.assertRaises(DatasetTypeNotSupportedError): 

329 datastore.put(metrics, ref) 

330 

331 # These should raise 

332 ref = self.makeDatasetRef("metrics", dimensions, storageClass, dataId) 

333 with self.assertRaises(FileNotFoundError): 

334 # non-existing file 

335 datastore.get(ref) 

336 

337 # Get a URI from it 

338 uri = datastore.getURI(ref, predict=True) 

339 self.assertEqual(uri.scheme, self.uriScheme) 

340 

341 with self.assertRaises(FileNotFoundError): 

342 datastore.getURI(ref) 

343 

344 def testTrustGetRequest(self): 

345 """Check that we can get datasets that registry knows nothing about.""" 

346 

347 datastore = self.makeDatastore() 

348 

349 # Skip test if the attribute is not defined 

350 if not hasattr(datastore, "trustGetRequest"): 

351 return 

352 

353 metrics = makeExampleMetrics() 

354 

355 i = 0 

356 for sc_name in ("StructuredDataNoComponents", "StructuredData", "StructuredComposite"): 

357 i += 1 

358 datasetTypeName = f"test_metric{i}" # Different dataset type name each time. 

359 

360 if sc_name == "StructuredComposite": 

361 disassembled = True 

362 else: 

363 disassembled = False 

364 

365 # Start datastore in default configuration of using registry 

366 datastore.trustGetRequest = False 

367 

368 # Create multiple storage classes for testing with or without 

369 # disassembly 

370 sc = self.storageClassFactory.getStorageClass(sc_name) 

371 dimensions = self.universe.extract(("visit", "physical_filter")) 

372 

373 dataId = DataIdForTest({"instrument": "dummy", "visit": 52 + i, "physical_filter": "V"}) 

374 

375 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

376 datastore.put(metrics, ref) 

377 

378 # Does it exist? 

379 self.assertTrue(datastore.exists(ref)) 

380 self.assertTrue(datastore.knows(ref)) 

381 multi = datastore.knows_these([ref]) 

382 self.assertTrue(multi[ref]) 

383 multi = datastore.mexists([ref]) 

384 self.assertTrue(multi[ref]) 

385 

386 # Get 

387 metricsOut = datastore.get(ref) 

388 self.assertEqual(metrics, metricsOut) 

389 

390 # Get the URI(s) 

391 primaryURI, componentURIs = datastore.getURIs(ref) 

392 if disassembled: 

393 self.assertIsNone(primaryURI) 

394 self.assertEqual(len(componentURIs), 3) 

395 else: 

396 self.assertIn(datasetTypeName, primaryURI.path) 

397 self.assertFalse(componentURIs) 

398 

399 # Delete registry entry so now we are trusting 

400 datastore.removeStoredItemInfo(ref) 

401 

402 # Now stop trusting and check that things break 

403 datastore.trustGetRequest = False 

404 

405 # Does it exist? 

406 self.assertFalse(datastore.exists(ref)) 

407 self.assertFalse(datastore.knows(ref)) 

408 multi = datastore.knows_these([ref]) 

409 self.assertFalse(multi[ref]) 

410 multi = datastore.mexists([ref]) 

411 self.assertFalse(multi[ref]) 

412 

413 with self.assertRaises(FileNotFoundError): 

414 datastore.get(ref) 

415 

416 if sc_name != "StructuredDataNoComponents": 

417 with self.assertRaises(FileNotFoundError): 

418 datastore.get(ref.makeComponentRef("data")) 

419 

420 # URI should fail unless we ask for prediction 

421 with self.assertRaises(FileNotFoundError): 

422 datastore.getURIs(ref) 

423 

424 predicted_primary, predicted_disassembled = datastore.getURIs(ref, predict=True) 

425 if disassembled: 

426 self.assertIsNone(predicted_primary) 

427 self.assertEqual(len(predicted_disassembled), 3) 

428 for uri in predicted_disassembled.values(): 

429 self.assertEqual(uri.fragment, "predicted") 

430 self.assertIn(datasetTypeName, uri.path) 

431 else: 

432 self.assertIn(datasetTypeName, predicted_primary.path) 

433 self.assertFalse(predicted_disassembled) 

434 self.assertEqual(predicted_primary.fragment, "predicted") 

435 

436 # Now enable registry-free trusting mode 

437 datastore.trustGetRequest = True 

438 

439 # Try again to get it 

440 metricsOut = datastore.get(ref) 

441 self.assertEqual(metricsOut, metrics) 

442 

443 # Does it exist? 

444 self.assertTrue(datastore.exists(ref)) 

445 

446 # Get a component 

447 if sc_name != "StructuredDataNoComponents": 

448 comp = "data" 

449 compRef = ref.makeComponentRef(comp) 

450 output = datastore.get(compRef) 

451 self.assertEqual(output, getattr(metrics, comp)) 

452 

453 # Get the URI -- if we trust this should work even without 

454 # enabling prediction. 

455 primaryURI2, componentURIs2 = datastore.getURIs(ref) 

456 self.assertEqual(primaryURI2, primaryURI) 

457 self.assertEqual(componentURIs2, componentURIs) 

458 

459 # Check for compatible storage class. 

460 if sc_name in ("StructuredDataNoComponents", "StructuredData"): 

461 # Make new dataset ref with compatible storage class. 

462 ref_comp = ref.overrideStorageClass("StructuredDataDictJson") 

463 

464 # Without `set_retrieve_dataset_type_method` it will fail to 

465 # find correct file. 

466 self.assertFalse(datastore.exists(ref_comp)) 

467 with self.assertRaises(FileNotFoundError): 

468 datastore.get(ref_comp) 

469 with self.assertRaises(FileNotFoundError): 

470 datastore.get(ref, storageClass="StructuredDataDictJson") 

471 

472 # Need a special method to generate stored dataset type. 

473 def _stored_dataset_type(name: str) -> DatasetType: 

474 if name == ref.datasetType.name: 

475 return ref.datasetType 

476 raise ValueError(f"Unexpected dataset type name {ref.datasetType.name}") 

477 

478 datastore.set_retrieve_dataset_type_method(_stored_dataset_type) 

479 

480 # Storage class override with original dataset ref. 

481 metrics_as_dict = datastore.get(ref, storageClass="StructuredDataDictJson") 

482 self.assertIsInstance(metrics_as_dict, dict) 

483 

484 # get() should return a dict now. 

485 metrics_as_dict = datastore.get(ref_comp) 

486 self.assertIsInstance(metrics_as_dict, dict) 

487 

488 # exists() should work as well. 

489 self.assertTrue(datastore.exists(ref_comp)) 

490 

491 datastore.set_retrieve_dataset_type_method(None) 

492 

493 def testDisassembly(self): 

494 """Test disassembly within datastore.""" 

495 metrics = makeExampleMetrics() 

496 if self.isEphemeral: 

497 # in-memory datastore does not disassemble 

498 return 

499 

500 # Create multiple storage classes for testing different formulations 

501 # of composites. One of these will not disassemble to provide 

502 # a reference. 

503 storageClasses = [ 

504 self.storageClassFactory.getStorageClass(sc) 

505 for sc in ( 

506 "StructuredComposite", 

507 "StructuredCompositeTestA", 

508 "StructuredCompositeTestB", 

509 "StructuredCompositeReadComp", 

510 "StructuredData", # No disassembly 

511 "StructuredCompositeReadCompNoDisassembly", 

512 ) 

513 ] 

514 

515 # Create the test datastore 

516 datastore = self.makeDatastore() 

517 

518 # Dummy dataId 

519 dimensions = self.universe.extract(("visit", "physical_filter")) 

520 dataId = DataIdForTest({"instrument": "dummy", "visit": 428, "physical_filter": "R"}) 

521 

522 for i, sc in enumerate(storageClasses): 

523 with self.subTest(storageClass=sc.name): 

524 # Create a different dataset type each time round 

525 # so that a test failure in this subtest does not trigger 

526 # a cascade of tests because of file clashes 

527 ref = self.makeDatasetRef(f"metric_comp_{i}", dimensions, sc, dataId, conform=False) 

528 

529 disassembled = sc.name not in {"StructuredData", "StructuredCompositeReadCompNoDisassembly"} 

530 

531 datastore.put(metrics, ref) 

532 

533 baseURI, compURIs = datastore.getURIs(ref) 

534 if disassembled: 

535 self.assertIsNone(baseURI) 

536 self.assertEqual(set(compURIs), {"data", "output", "summary"}) 

537 else: 

538 self.assertIsNotNone(baseURI) 

539 self.assertEqual(compURIs, {}) 

540 

541 metrics_get = datastore.get(ref) 

542 self.assertEqual(metrics_get, metrics) 

543 

544 # Retrieve the composite with read parameter 

545 stop = 4 

546 metrics_get = datastore.get(ref, parameters={"slice": slice(stop)}) 

547 self.assertEqual(metrics_get.summary, metrics.summary) 

548 self.assertEqual(metrics_get.output, metrics.output) 

549 self.assertEqual(metrics_get.data, metrics.data[:stop]) 

550 

551 # Retrieve a component 

552 data = datastore.get(ref.makeComponentRef("data")) 

553 self.assertEqual(data, metrics.data) 

554 

555 # On supported storage classes attempt to access a read 

556 # only component 

557 if "ReadComp" in sc.name: 

558 cRef = ref.makeComponentRef("counter") 

559 counter = datastore.get(cRef) 

560 self.assertEqual(counter, len(metrics.data)) 

561 

562 counter = datastore.get(cRef, parameters={"slice": slice(stop)}) 

563 self.assertEqual(counter, stop) 

564 

565 datastore.remove(ref) 

566 

567 def prepDeleteTest(self, n_refs=1): 

568 metrics = makeExampleMetrics() 

569 datastore = self.makeDatastore() 

570 # Put 

571 dimensions = self.universe.extract(("visit", "physical_filter")) 

572 sc = self.storageClassFactory.getStorageClass("StructuredData") 

573 refs = [] 

574 for i in range(n_refs): 

575 dataId = FakeDataCoordinate.from_dict( 

576 {"instrument": "dummy", "visit": 638 + i, "physical_filter": "U"} 

577 ) 

578 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

579 datastore.put(metrics, ref) 

580 

581 # Does it exist? 

582 self.assertTrue(datastore.exists(ref)) 

583 

584 # Get 

585 metricsOut = datastore.get(ref) 

586 self.assertEqual(metrics, metricsOut) 

587 refs.append(ref) 

588 

589 return datastore, *refs 

590 

591 def testRemove(self): 

592 datastore, ref = self.prepDeleteTest() 

593 

594 # Remove 

595 datastore.remove(ref) 

596 

597 # Does it exist? 

598 self.assertFalse(datastore.exists(ref)) 

599 

600 # Do we now get a predicted URI? 

601 uri = datastore.getURI(ref, predict=True) 

602 self.assertEqual(uri.fragment, "predicted") 

603 

604 # Get should now fail 

605 with self.assertRaises(FileNotFoundError): 

606 datastore.get(ref) 

607 # Can only delete once 

608 with self.assertRaises(FileNotFoundError): 

609 datastore.remove(ref) 

610 

611 def testForget(self): 

612 datastore, ref = self.prepDeleteTest() 

613 

614 # Remove 

615 datastore.forget([ref]) 

616 

617 # Does it exist (as far as we know)? 

618 self.assertFalse(datastore.exists(ref)) 

619 

620 # Do we now get a predicted URI? 

621 uri = datastore.getURI(ref, predict=True) 

622 self.assertEqual(uri.fragment, "predicted") 

623 

624 # Get should now fail 

625 with self.assertRaises(FileNotFoundError): 

626 datastore.get(ref) 

627 

628 # Forgetting again is a silent no-op 

629 datastore.forget([ref]) 

630 

631 # Predicted URI should still point to the file. 

632 self.assertTrue(uri.exists()) 

633 

634 def testTransfer(self): 

635 metrics = makeExampleMetrics() 

636 

637 dimensions = self.universe.extract(("visit", "physical_filter")) 

638 dataId = DataIdForTest({"instrument": "dummy", "visit": 2048, "physical_filter": "Uprime"}) 

639 

640 sc = self.storageClassFactory.getStorageClass("StructuredData") 

641 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

642 

643 inputDatastore = self.makeDatastore("test_input_datastore") 

644 outputDatastore = self.makeDatastore("test_output_datastore") 

645 

646 inputDatastore.put(metrics, ref) 

647 outputDatastore.transfer(inputDatastore, ref) 

648 

649 metricsOut = outputDatastore.get(ref) 

650 self.assertEqual(metrics, metricsOut) 

651 

652 def testBasicTransaction(self): 

653 datastore = self.makeDatastore() 

654 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

655 dimensions = self.universe.extract(("visit", "physical_filter")) 

656 nDatasets = 6 

657 dataIds = [ 

658 DataIdForTest({"instrument": "dummy", "visit": i, "physical_filter": "V"}) 

659 for i in range(nDatasets) 

660 ] 

661 data = [ 

662 ( 

663 self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False), 

664 makeExampleMetrics(), 

665 ) 

666 for dataId in dataIds 

667 ] 

668 succeed = data[: nDatasets // 2] 

669 fail = data[nDatasets // 2 :] 

670 # All datasets added in this transaction should continue to exist 

671 with datastore.transaction(): 

672 for ref, metrics in succeed: 

673 datastore.put(metrics, ref) 

674 # Whereas datasets added in this transaction should not 

675 with self.assertRaises(TransactionTestError): 

676 with datastore.transaction(): 

677 for ref, metrics in fail: 

678 datastore.put(metrics, ref) 

679 raise TransactionTestError("This should propagate out of the context manager") 

680 # Check for datasets that should exist 

681 for ref, metrics in succeed: 

682 # Does it exist? 

683 self.assertTrue(datastore.exists(ref)) 

684 # Get 

685 metricsOut = datastore.get(ref, parameters=None) 

686 self.assertEqual(metrics, metricsOut) 

687 # URI 

688 uri = datastore.getURI(ref) 

689 self.assertEqual(uri.scheme, self.uriScheme) 

690 # Check for datasets that should not exist 

691 for ref, _ in fail: 

692 # These should raise 

693 with self.assertRaises(FileNotFoundError): 

694 # non-existing file 

695 datastore.get(ref) 

696 with self.assertRaises(FileNotFoundError): 

697 datastore.getURI(ref) 

698 

699 def testNestedTransaction(self): 

700 datastore = self.makeDatastore() 

701 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

702 dimensions = self.universe.extract(("visit", "physical_filter")) 

703 metrics = makeExampleMetrics() 

704 

705 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

706 refBefore = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

707 datastore.put(metrics, refBefore) 

708 with self.assertRaises(TransactionTestError): 

709 with datastore.transaction(): 

710 dataId = DataIdForTest({"instrument": "dummy", "visit": 1, "physical_filter": "V"}) 

711 refOuter = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

712 datastore.put(metrics, refOuter) 

713 with datastore.transaction(): 

714 dataId = DataIdForTest({"instrument": "dummy", "visit": 2, "physical_filter": "V"}) 

715 refInner = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

716 datastore.put(metrics, refInner) 

717 # All datasets should exist 

718 for ref in (refBefore, refOuter, refInner): 

719 metricsOut = datastore.get(ref, parameters=None) 

720 self.assertEqual(metrics, metricsOut) 

721 raise TransactionTestError("This should roll back the transaction") 

722 # Dataset(s) inserted before the transaction should still exist 

723 metricsOut = datastore.get(refBefore, parameters=None) 

724 self.assertEqual(metrics, metricsOut) 

725 # But all datasets inserted during the (rolled back) transaction 

726 # should be gone 

727 with self.assertRaises(FileNotFoundError): 

728 datastore.get(refOuter) 

729 with self.assertRaises(FileNotFoundError): 

730 datastore.get(refInner) 

731 

732 def _prepareIngestTest(self): 

733 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

734 dimensions = self.universe.extract(("visit", "physical_filter")) 

735 metrics = makeExampleMetrics() 

736 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

737 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

738 return metrics, ref 

739 

740 def runIngestTest(self, func, expectOutput=True): 

741 metrics, ref = self._prepareIngestTest() 

742 # The file will be deleted after the test. 

743 # For symlink tests this leads to a situation where the datastore 

744 # points to a file that does not exist. This will make os.path.exist 

745 # return False but then the new symlink will fail with 

746 # FileExistsError later in the code so the test still passes. 

747 with lsst.utils.tests.getTempFilePath(".yaml", expectOutput=expectOutput) as path: 

748 with open(path, "w") as fd: 

749 yaml.dump(metrics._asdict(), stream=fd) 

750 func(metrics, path, ref) 

751 

752 def testIngestNoTransfer(self): 

753 """Test ingesting existing files with no transfer.""" 

754 for mode in (None, "auto"): 

755 # Some datastores have auto but can't do in place transfer 

756 if mode == "auto" and "auto" in self.ingestTransferModes and not self.canIngestNoTransferAuto: 

757 continue 

758 

759 with self.subTest(mode=mode): 

760 datastore = self.makeDatastore() 

761 

762 def succeed(obj, path, ref): 

763 """Ingest a file already in the datastore root.""" 

764 # first move it into the root, and adjust the path 

765 # accordingly 

766 path = shutil.copy(path, datastore.root.ospath) 

767 path = os.path.relpath(path, start=datastore.root.ospath) 

768 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

769 self.assertEqual(obj, datastore.get(ref)) 

770 

771 def failInputDoesNotExist(obj, path, ref): 

772 """Can't ingest files if we're given a bad path.""" 

773 with self.assertRaises(FileNotFoundError): 

774 datastore.ingest( 

775 FileDataset(path="this-file-does-not-exist.yaml", refs=ref), transfer=mode 

776 ) 

777 self.assertFalse(datastore.exists(ref)) 

778 

779 def failOutsideRoot(obj, path, ref): 

780 """Can't ingest files outside of datastore root unless 

781 auto.""" 

782 if mode == "auto": 

783 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

784 self.assertTrue(datastore.exists(ref)) 

785 else: 

786 with self.assertRaises(RuntimeError): 

787 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

788 self.assertFalse(datastore.exists(ref)) 

789 

790 def failNotImplemented(obj, path, ref): 

791 with self.assertRaises(NotImplementedError): 

792 datastore.ingest(FileDataset(path=path, refs=ref), transfer=mode) 

793 

794 if mode in self.ingestTransferModes: 

795 self.runIngestTest(failOutsideRoot) 

796 self.runIngestTest(failInputDoesNotExist) 

797 self.runIngestTest(succeed) 

798 else: 

799 self.runIngestTest(failNotImplemented) 

800 

801 def testIngestTransfer(self): 

802 """Test ingesting existing files after transferring them.""" 

803 for mode in ("copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto"): 

804 with self.subTest(mode=mode): 

805 datastore = self.makeDatastore(mode) 

806 

807 def succeed(obj, path, ref): 

808 """Ingest a file by transferring it to the template 

809 location.""" 

810 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

811 self.assertEqual(obj, datastore.get(ref)) 

812 

813 def failInputDoesNotExist(obj, path, ref): 

814 """Can't ingest files if we're given a bad path.""" 

815 with self.assertRaises(FileNotFoundError): 

816 # Ensure the file does not look like it is in 

817 # datastore for auto mode 

818 datastore.ingest( 

819 FileDataset(path="../this-file-does-not-exist.yaml", refs=ref), transfer=mode 

820 ) 

821 self.assertFalse(datastore.exists(ref), f"Checking not in datastore using mode {mode}") 

822 

823 def failNotImplemented(obj, path, ref): 

824 with self.assertRaises(NotImplementedError): 

825 datastore.ingest(FileDataset(path=os.path.abspath(path), refs=ref), transfer=mode) 

826 

827 if mode in self.ingestTransferModes: 

828 self.runIngestTest(failInputDoesNotExist) 

829 self.runIngestTest(succeed, expectOutput=(mode != "move")) 

830 else: 

831 self.runIngestTest(failNotImplemented) 

832 

833 def testIngestSymlinkOfSymlink(self): 

834 """Special test for symlink to a symlink ingest""" 

835 metrics, ref = self._prepareIngestTest() 

836 # The aim of this test is to create a dataset on disk, then 

837 # create a symlink to it and finally ingest the symlink such that 

838 # the symlink in the datastore points to the original dataset. 

839 for mode in ("symlink", "relsymlink"): 

840 if mode not in self.ingestTransferModes: 

841 continue 

842 

843 print(f"Trying mode {mode}") 

844 with lsst.utils.tests.getTempFilePath(".yaml") as realpath: 

845 with open(realpath, "w") as fd: 

846 yaml.dump(metrics._asdict(), stream=fd) 

847 with lsst.utils.tests.getTempFilePath(".yaml") as sympath: 

848 os.symlink(os.path.abspath(realpath), sympath) 

849 

850 datastore = self.makeDatastore() 

851 datastore.ingest(FileDataset(path=os.path.abspath(sympath), refs=ref), transfer=mode) 

852 

853 uri = datastore.getURI(ref) 

854 self.assertTrue(uri.isLocal, f"Check {uri.scheme}") 

855 self.assertTrue(os.path.islink(uri.ospath), f"Check {uri} is a symlink") 

856 

857 linkTarget = os.readlink(uri.ospath) 

858 if mode == "relsymlink": 

859 self.assertFalse(os.path.isabs(linkTarget)) 

860 else: 

861 self.assertEqual(linkTarget, os.path.abspath(realpath)) 

862 

863 # Check that we can get the dataset back regardless of mode 

864 metric2 = datastore.get(ref) 

865 self.assertEqual(metric2, metrics) 

866 

867 # Cleanup the file for next time round loop 

868 # since it will get the same file name in store 

869 datastore.remove(ref) 

870 

871 def _populate_export_datastore(self, name: str) -> tuple[Datastore, list[DatasetRef]]: 

872 datastore = self.makeDatastore(name) 

873 

874 # For now only the FileDatastore can be used for this test. 

875 # ChainedDatastore that only includes InMemoryDatastores have to be 

876 # skipped as well. 

877 for name in datastore.names: 

878 if not name.startswith("InMemoryDatastore"): 

879 break 

880 else: 

881 raise unittest.SkipTest("in-memory datastore does not support record export/import") 

882 

883 metrics = makeExampleMetrics() 

884 dimensions = self.universe.extract(("visit", "physical_filter")) 

885 sc = self.storageClassFactory.getStorageClass("StructuredData") 

886 

887 refs = [] 

888 for visit in (2048, 2049, 2050): 

889 dataId = FakeDataCoordinate.from_dict( 

890 {"instrument": "dummy", "visit": visit, "physical_filter": "Uprime"} 

891 ) 

892 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

893 datastore.put(metrics, ref) 

894 refs.append(ref) 

895 return datastore, refs 

896 

897 def testExportImportRecords(self): 

898 """Test for export_records and import_records methods.""" 

899 datastore, refs = self._populate_export_datastore("test_datastore") 

900 for exported_refs in (refs, refs[1:]): 

901 n_refs = len(exported_refs) 

902 records = datastore.export_records(exported_refs) 

903 self.assertGreater(len(records), 0) 

904 self.assertTrue(set(records.keys()) <= set(datastore.names)) 

905 # In a ChainedDatastore each FileDatastore will have a complete set 

906 for datastore_name in records: 

907 record_data = records[datastore_name] 

908 self.assertEqual(len(record_data.records), n_refs) 

909 

910 # Check that subsetting works, include non-existing dataset ID. 

911 dataset_ids = {exported_refs[0].id, uuid.uuid4()} 

912 subset = record_data.subset(dataset_ids) 

913 assert subset is not None 

914 self.assertEqual(len(subset.records), 1) 

915 subset = record_data.subset({uuid.uuid4()}) 

916 self.assertIsNone(subset) 

917 

918 # Use the same datastore name to import relative path. 

919 datastore2 = self.makeDatastore("test_datastore") 

920 

921 records = datastore.export_records(refs[1:]) 

922 datastore2.import_records(records) 

923 

924 with self.assertRaises(FileNotFoundError): 

925 data = datastore2.get(refs[0]) 

926 data = datastore2.get(refs[1]) 

927 self.assertIsNotNone(data) 

928 data = datastore2.get(refs[2]) 

929 self.assertIsNotNone(data) 

930 

931 def testExport(self): 

932 datastore, refs = self._populate_export_datastore("test_datastore") 

933 

934 datasets = list(datastore.export(refs)) 

935 self.assertEqual(len(datasets), 3) 

936 

937 for transfer in (None, "auto"): 

938 # Both will default to None 

939 datasets = list(datastore.export(refs, transfer=transfer)) 

940 self.assertEqual(len(datasets), 3) 

941 

942 with self.assertRaises(TypeError): 

943 list(datastore.export(refs, transfer="copy")) 

944 

945 with self.assertRaises(TypeError): 

946 list(datastore.export(refs, directory="exportDir", transfer="move")) 

947 

948 # Create a new ref that is not known to the datastore and try to 

949 # export it. 

950 sc = self.storageClassFactory.getStorageClass("ThingOne") 

951 dimensions = self.universe.extract(("visit", "physical_filter")) 

952 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

953 ref = self.makeDatasetRef("metric", dimensions, sc, dataId, conform=False) 

954 with self.assertRaises(FileNotFoundError): 

955 list(datastore.export(refs + [ref], transfer=None)) 

956 

957 def test_pydantic_dict_storage_class_conversions(self): 

958 """Test converting a dataset stored as a pydantic model into a dict on 

959 read. 

960 """ 

961 datastore = self.makeDatastore() 

962 store_as_model = self.makeDatasetRef( 

963 "store_as_model", 

964 dimensions=self.universe.empty, 

965 storageClass="DictConvertibleModel", 

966 dataId=DataCoordinate.makeEmpty(self.universe), 

967 ) 

968 content = {"a": "one", "b": "two"} 

969 model = DictConvertibleModel.from_dict(content, extra="original content") 

970 datastore.put(model, store_as_model) 

971 retrieved_model = datastore.get(store_as_model) 

972 self.assertEqual(retrieved_model, model) 

973 loaded = datastore.get(store_as_model.overrideStorageClass("NativeDictForConvertibleModel")) 

974 self.assertEqual(type(loaded), dict) 

975 self.assertEqual(loaded, content) 

976 

977 def test_simple_class_put_get(self): 

978 """Test that we can put and get a simple class with dict() 

979 constructor.""" 

980 datastore = self.makeDatastore() 

981 data = MetricsExample(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

982 self._assert_different_puts(datastore, "MetricsExample", data) 

983 

984 def test_dataclass_put_get(self): 

985 """Test that we can put and get a simple dataclass.""" 

986 datastore = self.makeDatastore() 

987 data = MetricsExampleDataclass(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

988 self._assert_different_puts(datastore, "MetricsExampleDataclass", data) 

989 

990 def test_pydantic_put_get(self): 

991 """Test that we can put and get a simple Pydantic model.""" 

992 datastore = self.makeDatastore() 

993 data = MetricsExampleModel(summary={"a": 1}, data=[1, 2, 3], output={"b": 2}) 

994 self._assert_different_puts(datastore, "MetricsExampleModel", data) 

995 

996 def test_tuple_put_get(self): 

997 """Test that we can put and get a tuple.""" 

998 datastore = self.makeDatastore() 

999 data = tuple(["a", "b", 1]) 

1000 self._assert_different_puts(datastore, "TupleExample", data) 

1001 

1002 def _assert_different_puts(self, datastore: Datastore, storageClass_root: str, data) -> None: 

1003 refs = { 

1004 x: self.makeDatasetRef( 

1005 f"stora_as_{x}", 

1006 dimensions=self.universe.empty, 

1007 storageClass=f"{storageClass_root}{x}", 

1008 dataId=DataCoordinate.makeEmpty(self.universe), 

1009 ) 

1010 for x in ["A", "B"] 

1011 } 

1012 

1013 for ref in refs.values(): 

1014 datastore.put(data, ref) 

1015 

1016 self.assertEqual(datastore.get(refs["A"]), datastore.get(refs["B"])) 

1017 

1018 

1019class PosixDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1020 """PosixDatastore specialization""" 

1021 

1022 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1023 uriScheme = "file" 

1024 canIngestNoTransferAuto = True 

1025 ingestTransferModes = (None, "copy", "move", "link", "hardlink", "symlink", "relsymlink", "auto") 

1026 isEphemeral = False 

1027 rootKeys = ("root",) 

1028 validationCanFail = True 

1029 

1030 def setUp(self): 

1031 # Override the working directory before calling the base class 

1032 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1033 super().setUp() 

1034 

1035 def testAtomicWrite(self): 

1036 """Test that we write to a temporary and then rename""" 

1037 datastore = self.makeDatastore() 

1038 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1039 dimensions = self.universe.extract(("visit", "physical_filter")) 

1040 metrics = makeExampleMetrics() 

1041 

1042 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1043 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1044 

1045 with self.assertLogs("lsst.resources", "DEBUG") as cm: 

1046 datastore.put(metrics, ref) 

1047 move_logs = [ll for ll in cm.output if "transfer=" in ll] 

1048 self.assertIn("transfer=move", move_logs[0]) 

1049 

1050 # And the transfer should be file to file. 

1051 self.assertEqual(move_logs[0].count("file://"), 2) 

1052 

1053 def testCanNotDeterminePutFormatterLocation(self): 

1054 """Verify that the expected exception is raised if the FileDatastore 

1055 can not determine the put formatter location.""" 

1056 

1057 _ = makeExampleMetrics() 

1058 datastore = self.makeDatastore() 

1059 

1060 # Create multiple storage classes for testing different formulations 

1061 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1062 

1063 sccomp = StorageClass("Dummy") 

1064 compositeStorageClass = StorageClass( 

1065 "StructuredComposite", components={"dummy": sccomp, "dummy2": sccomp} 

1066 ) 

1067 

1068 dimensions = self.universe.extract(("visit", "physical_filter")) 

1069 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1070 

1071 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1072 compRef = self.makeDatasetRef("metric", dimensions, compositeStorageClass, dataId, conform=False) 

1073 

1074 def raiser(ref): 

1075 raise DatasetTypeNotSupportedError() 

1076 

1077 with unittest.mock.patch.object( 

1078 lsst.daf.butler.datastores.fileDatastore.FileDatastore, 

1079 "_determine_put_formatter_location", 

1080 side_effect=raiser, 

1081 ): 

1082 # verify the non-composite ref execution path: 

1083 with self.assertRaises(DatasetTypeNotSupportedError): 

1084 datastore.getURIs(ref, predict=True) 

1085 

1086 # verify the composite-ref execution path: 

1087 with self.assertRaises(DatasetTypeNotSupportedError): 

1088 datastore.getURIs(compRef, predict=True) 

1089 

1090 

1091class PosixDatastoreNoChecksumsTestCase(PosixDatastoreTestCase): 

1092 """Posix datastore tests but with checksums disabled.""" 

1093 

1094 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreNoChecksums.yaml") 

1095 

1096 def testChecksum(self): 

1097 """Ensure that checksums have not been calculated.""" 

1098 

1099 datastore = self.makeDatastore() 

1100 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1101 dimensions = self.universe.extract(("visit", "physical_filter")) 

1102 metrics = makeExampleMetrics() 

1103 

1104 dataId = DataIdForTest({"instrument": "dummy", "visit": 0, "physical_filter": "V"}) 

1105 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1106 

1107 # Configuration should have disabled checksum calculation 

1108 datastore.put(metrics, ref) 

1109 infos = datastore.getStoredItemsInfo(ref) 

1110 self.assertIsNone(infos[0].checksum) 

1111 

1112 # Remove put back but with checksums enabled explicitly 

1113 datastore.remove(ref) 

1114 datastore.useChecksum = True 

1115 datastore.put(metrics, ref) 

1116 

1117 infos = datastore.getStoredItemsInfo(ref) 

1118 self.assertIsNotNone(infos[0].checksum) 

1119 

1120 

1121class TrashDatastoreTestCase(PosixDatastoreTestCase): 

1122 """Restrict trash test to FileDatastore.""" 

1123 

1124 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1125 

1126 def testTrash(self): 

1127 datastore, *refs = self.prepDeleteTest(n_refs=10) 

1128 

1129 # Trash one of them. 

1130 ref = refs.pop() 

1131 uri = datastore.getURI(ref) 

1132 datastore.trash(ref) 

1133 self.assertTrue(uri.exists(), uri) # Not deleted yet 

1134 datastore.emptyTrash() 

1135 self.assertFalse(uri.exists(), uri) 

1136 

1137 # Trash it again should be fine. 

1138 datastore.trash(ref) 

1139 

1140 # Trash multiple items at once. 

1141 subset = [refs.pop(), refs.pop()] 

1142 datastore.trash(subset) 

1143 datastore.emptyTrash() 

1144 

1145 # Remove a record and trash should do nothing. 

1146 # This is execution butler scenario. 

1147 ref = refs.pop() 

1148 uri = datastore.getURI(ref) 

1149 datastore._table.delete(["dataset_id"], {"dataset_id": ref.id}) 

1150 self.assertTrue(uri.exists()) 

1151 datastore.trash(ref) 

1152 datastore.emptyTrash() 

1153 self.assertTrue(uri.exists()) 

1154 

1155 # Switch on trust and it should delete the file. 

1156 datastore.trustGetRequest = True 

1157 datastore.trash([ref]) 

1158 self.assertFalse(uri.exists()) 

1159 

1160 # Remove multiples at once in trust mode. 

1161 subset = [refs.pop() for i in range(3)] 

1162 datastore.trash(subset) 

1163 datastore.trash(refs.pop()) # Check that a single ref can trash 

1164 

1165 

1166class CleanupPosixDatastoreTestCase(DatastoreTestsBase, unittest.TestCase): 

1167 configFile = os.path.join(TESTDIR, "config/basic/butler.yaml") 

1168 

1169 def setUp(self): 

1170 # Override the working directory before calling the base class 

1171 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1172 super().setUp() 

1173 

1174 def testCleanup(self): 

1175 """Test that a failed formatter write does cleanup a partial file.""" 

1176 metrics = makeExampleMetrics() 

1177 datastore = self.makeDatastore() 

1178 

1179 storageClass = self.storageClassFactory.getStorageClass("StructuredData") 

1180 

1181 dimensions = self.universe.extract(("visit", "physical_filter")) 

1182 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1183 

1184 ref = self.makeDatasetRef("metric", dimensions, storageClass, dataId, conform=False) 

1185 

1186 # Determine where the file will end up (we assume Formatters use 

1187 # the same file extension) 

1188 expectedUri = datastore.getURI(ref, predict=True) 

1189 self.assertEqual(expectedUri.fragment, "predicted") 

1190 

1191 self.assertEqual(expectedUri.getExtension(), ".yaml", f"Is there a file extension in {expectedUri}") 

1192 

1193 # Try formatter that fails and formatter that fails and leaves 

1194 # a file behind 

1195 for formatter in (BadWriteFormatter, BadNoWriteFormatter): 

1196 with self.subTest(formatter=formatter): 

1197 # Monkey patch the formatter 

1198 datastore.formatterFactory.registerFormatter(ref.datasetType, formatter, overwrite=True) 

1199 

1200 # Try to put the dataset, it should fail 

1201 with self.assertRaises(Exception): 

1202 datastore.put(metrics, ref) 

1203 

1204 # Check that there is no file on disk 

1205 self.assertFalse(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1206 

1207 # Check that there is a directory 

1208 dir = expectedUri.dirname() 

1209 self.assertTrue(dir.exists(), f"Check for existence of directory {dir}") 

1210 

1211 # Force YamlFormatter and check that this time a file is written 

1212 datastore.formatterFactory.registerFormatter(ref.datasetType, YamlFormatter, overwrite=True) 

1213 datastore.put(metrics, ref) 

1214 self.assertTrue(expectedUri.exists(), f"Check for existence of {expectedUri}") 

1215 datastore.remove(ref) 

1216 self.assertFalse(expectedUri.exists(), f"Check for existence of now removed {expectedUri}") 

1217 

1218 

1219class InMemoryDatastoreTestCase(DatastoreTests, unittest.TestCase): 

1220 """PosixDatastore specialization""" 

1221 

1222 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastore.yaml") 

1223 uriScheme = "mem" 

1224 hasUnsupportedPut = False 

1225 ingestTransferModes = () 

1226 isEphemeral = True 

1227 rootKeys = None 

1228 validationCanFail = False 

1229 

1230 

1231class ChainedDatastoreTestCase(PosixDatastoreTestCase): 

1232 """ChainedDatastore specialization using a POSIXDatastore""" 

1233 

1234 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore.yaml") 

1235 hasUnsupportedPut = False 

1236 canIngestNoTransferAuto = False 

1237 ingestTransferModes = ("copy", "move", "hardlink", "symlink", "relsymlink", "link", "auto") 

1238 isEphemeral = False 

1239 rootKeys = (".datastores.1.root", ".datastores.2.root") 

1240 validationCanFail = True 

1241 

1242 

1243class ChainedDatastoreMemoryTestCase(InMemoryDatastoreTestCase): 

1244 """ChainedDatastore specialization using all InMemoryDatastore""" 

1245 

1246 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2.yaml") 

1247 validationCanFail = False 

1248 

1249 

1250class DatastoreConstraintsTests(DatastoreTestsBase): 

1251 """Basic tests of constraints model of Datastores.""" 

1252 

1253 def testConstraints(self): 

1254 """Test constraints model. Assumes that each test class has the 

1255 same constraints.""" 

1256 metrics = makeExampleMetrics() 

1257 datastore = self.makeDatastore() 

1258 

1259 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1260 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1261 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1262 dataId = DataIdForTest({"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"}) 

1263 

1264 # Write empty file suitable for ingest check (JSON and YAML variants) 

1265 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1266 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1267 for datasetTypeName, sc, accepted in ( 

1268 ("metric", sc1, True), 

1269 ("metric5", sc1, False), 

1270 ("metric33", sc1, True), 

1271 ("metric5", sc2, True), 

1272 ): 

1273 # Choose different temp file depending on StorageClass 

1274 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1275 

1276 with self.subTest(datasetTypeName=datasetTypeName, storageClass=sc.name, file=testfile.name): 

1277 ref = self.makeDatasetRef(datasetTypeName, dimensions, sc, dataId, conform=False) 

1278 if accepted: 

1279 datastore.put(metrics, ref) 

1280 self.assertTrue(datastore.exists(ref)) 

1281 datastore.remove(ref) 

1282 

1283 # Try ingest 

1284 if self.canIngest: 

1285 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1286 self.assertTrue(datastore.exists(ref)) 

1287 datastore.remove(ref) 

1288 else: 

1289 with self.assertRaises(DatasetTypeNotSupportedError): 

1290 datastore.put(metrics, ref) 

1291 self.assertFalse(datastore.exists(ref)) 

1292 

1293 # Again with ingest 

1294 if self.canIngest: 

1295 with self.assertRaises(DatasetTypeNotSupportedError): 

1296 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1297 self.assertFalse(datastore.exists(ref)) 

1298 

1299 

1300class PosixDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1301 """PosixDatastore specialization""" 

1302 

1303 configFile = os.path.join(TESTDIR, "config/basic/posixDatastoreP.yaml") 

1304 canIngest = True 

1305 

1306 def setUp(self): 

1307 # Override the working directory before calling the base class 

1308 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1309 super().setUp() 

1310 

1311 

1312class InMemoryDatastoreConstraintsTestCase(DatastoreConstraintsTests, unittest.TestCase): 

1313 """InMemoryDatastore specialization""" 

1314 

1315 configFile = os.path.join(TESTDIR, "config/basic/inMemoryDatastoreP.yaml") 

1316 canIngest = False 

1317 

1318 

1319class ChainedDatastoreConstraintsNativeTestCase(PosixDatastoreConstraintsTestCase): 

1320 """ChainedDatastore specialization using a POSIXDatastore and constraints 

1321 at the ChainedDatstore""" 

1322 

1323 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePa.yaml") 

1324 

1325 

1326class ChainedDatastoreConstraintsTestCase(PosixDatastoreConstraintsTestCase): 

1327 """ChainedDatastore specialization using a POSIXDatastore""" 

1328 

1329 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastoreP.yaml") 

1330 

1331 

1332class ChainedDatastoreMemoryConstraintsTestCase(InMemoryDatastoreConstraintsTestCase): 

1333 """ChainedDatastore specialization using all InMemoryDatastore""" 

1334 

1335 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastore2P.yaml") 

1336 canIngest = False 

1337 

1338 

1339class ChainedDatastorePerStoreConstraintsTests(DatastoreTestsBase, unittest.TestCase): 

1340 """Test that a chained datastore can control constraints per-datastore 

1341 even if child datastore would accept.""" 

1342 

1343 configFile = os.path.join(TESTDIR, "config/basic/chainedDatastorePb.yaml") 

1344 

1345 def setUp(self): 

1346 # Override the working directory before calling the base class 

1347 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1348 super().setUp() 

1349 

1350 def testConstraints(self): 

1351 """Test chained datastore constraints model.""" 

1352 metrics = makeExampleMetrics() 

1353 datastore = self.makeDatastore() 

1354 

1355 sc1 = self.storageClassFactory.getStorageClass("StructuredData") 

1356 sc2 = self.storageClassFactory.getStorageClass("StructuredDataJson") 

1357 dimensions = self.universe.extract(("visit", "physical_filter", "instrument")) 

1358 dataId1 = {"visit": 52, "physical_filter": "V", "instrument": "DummyCamComp"} 

1359 dataId2 = {"visit": 52, "physical_filter": "V", "instrument": "HSC"} 

1360 

1361 # Write empty file suitable for ingest check (JSON and YAML variants) 

1362 testfile_y = tempfile.NamedTemporaryFile(suffix=".yaml") 

1363 testfile_j = tempfile.NamedTemporaryFile(suffix=".json") 

1364 

1365 for typeName, dataId, sc, accept, ingest in ( 

1366 ("metric", dataId1, sc1, (False, True, False), True), 

1367 ("metric5", dataId1, sc1, (False, False, False), False), 

1368 ("metric5", dataId2, sc1, (True, False, False), False), 

1369 ("metric33", dataId2, sc2, (True, True, False), True), 

1370 ("metric5", dataId1, sc2, (False, True, False), True), 

1371 ): 

1372 # Choose different temp file depending on StorageClass 

1373 testfile = testfile_j if sc.name.endswith("Json") else testfile_y 

1374 

1375 with self.subTest(datasetTypeName=typeName, dataId=dataId, sc=sc.name): 

1376 ref = self.makeDatasetRef(typeName, dimensions, sc, dataId, conform=False) 

1377 if any(accept): 

1378 datastore.put(metrics, ref) 

1379 self.assertTrue(datastore.exists(ref)) 

1380 

1381 # Check each datastore inside the chained datastore 

1382 for childDatastore, expected in zip(datastore.datastores, accept): 

1383 self.assertEqual( 

1384 childDatastore.exists(ref), 

1385 expected, 

1386 f"Testing presence of {ref} in datastore {childDatastore.name}", 

1387 ) 

1388 

1389 datastore.remove(ref) 

1390 

1391 # Check that ingest works 

1392 if ingest: 

1393 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1394 self.assertTrue(datastore.exists(ref)) 

1395 

1396 # Check each datastore inside the chained datastore 

1397 for childDatastore, expected in zip(datastore.datastores, accept): 

1398 # Ephemeral datastores means InMemory at the moment 

1399 # and that does not accept ingest of files. 

1400 if childDatastore.isEphemeral: 

1401 expected = False 

1402 self.assertEqual( 

1403 childDatastore.exists(ref), 

1404 expected, 

1405 f"Testing presence of ingested {ref} in datastore {childDatastore.name}", 

1406 ) 

1407 

1408 datastore.remove(ref) 

1409 else: 

1410 with self.assertRaises(DatasetTypeNotSupportedError): 

1411 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1412 

1413 else: 

1414 with self.assertRaises(DatasetTypeNotSupportedError): 

1415 datastore.put(metrics, ref) 

1416 self.assertFalse(datastore.exists(ref)) 

1417 

1418 # Again with ingest 

1419 with self.assertRaises(DatasetTypeNotSupportedError): 

1420 datastore.ingest(FileDataset(testfile.name, [ref]), transfer="link") 

1421 self.assertFalse(datastore.exists(ref)) 

1422 

1423 

1424class DatastoreCacheTestCase(DatasetTestHelper, unittest.TestCase): 

1425 """Tests for datastore caching infrastructure.""" 

1426 

1427 @classmethod 

1428 def setUpClass(cls): 

1429 cls.storageClassFactory = StorageClassFactory() 

1430 cls.universe = DimensionUniverse() 

1431 

1432 # Ensure that we load the test storage class definitions. 

1433 scConfigFile = os.path.join(TESTDIR, "config/basic/storageClasses.yaml") 

1434 cls.storageClassFactory.addFromConfig(scConfigFile) 

1435 

1436 def setUp(self): 

1437 self.id = 0 

1438 

1439 # Create a root that we can use for caching tests. 

1440 self.root = tempfile.mkdtemp(dir=TESTDIR) 

1441 

1442 # Create some test dataset refs and associated test files 

1443 sc = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1444 dimensions = self.universe.extract(("visit", "physical_filter")) 

1445 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1446 

1447 # Create list of refs and list of temporary files 

1448 n_datasets = 10 

1449 self.refs = [ 

1450 self.makeDatasetRef(f"metric{n}", dimensions, sc, dataId, conform=False) 

1451 for n in range(n_datasets) 

1452 ] 

1453 

1454 root_uri = ResourcePath(self.root, forceDirectory=True) 

1455 self.files = [root_uri.join(f"file{n}.txt") for n in range(n_datasets)] 

1456 

1457 # Create test files. 

1458 for uri in self.files: 

1459 uri.write(b"0123456789") 

1460 

1461 # Create some composite refs with component files. 

1462 sc = self.storageClassFactory.getStorageClass("StructuredData") 

1463 self.composite_refs = [ 

1464 self.makeDatasetRef(f"composite{n}", dimensions, sc, dataId, conform=False) for n in range(3) 

1465 ] 

1466 self.comp_files = [] 

1467 self.comp_refs = [] 

1468 for n, ref in enumerate(self.composite_refs): 

1469 component_refs = [] 

1470 component_files = [] 

1471 for component in sc.components: 

1472 component_ref = ref.makeComponentRef(component) 

1473 file = root_uri.join(f"composite_file-{n}-{component}.txt") 

1474 component_refs.append(component_ref) 

1475 component_files.append(file) 

1476 file.write(b"9876543210") 

1477 

1478 self.comp_files.append(component_files) 

1479 self.comp_refs.append(component_refs) 

1480 

1481 def tearDown(self): 

1482 if self.root is not None and os.path.exists(self.root): 

1483 shutil.rmtree(self.root, ignore_errors=True) 

1484 

1485 def _make_cache_manager(self, config_str: str) -> DatastoreCacheManager: 

1486 config = Config.fromYaml(config_str) 

1487 return DatastoreCacheManager(DatastoreCacheManagerConfig(config), universe=self.universe) 

1488 

1489 def testNoCacheDir(self): 

1490 config_str = """ 

1491cached: 

1492 root: null 

1493 cacheable: 

1494 metric0: true 

1495 """ 

1496 cache_manager = self._make_cache_manager(config_str) 

1497 

1498 # Look inside to check we don't have a cache directory 

1499 self.assertIsNone(cache_manager._cache_directory) 

1500 

1501 self.assertCache(cache_manager) 

1502 

1503 # Test that the cache directory is marked temporary 

1504 self.assertTrue(cache_manager.cache_directory.isTemporary) 

1505 

1506 def testNoCacheDirReversed(self): 

1507 """Use default caching status and metric1 to false""" 

1508 config_str = """ 

1509cached: 

1510 root: null 

1511 default: true 

1512 cacheable: 

1513 metric1: false 

1514 """ 

1515 cache_manager = self._make_cache_manager(config_str) 

1516 

1517 self.assertCache(cache_manager) 

1518 

1519 def testEnvvarCacheDir(self): 

1520 config_str = f""" 

1521cached: 

1522 root: '{self.root}' 

1523 cacheable: 

1524 metric0: true 

1525 """ 

1526 

1527 root = ResourcePath(self.root, forceDirectory=True) 

1528 env_dir = root.join("somewhere", forceDirectory=True) 

1529 elsewhere = root.join("elsewhere", forceDirectory=True) 

1530 

1531 # Environment variable should override the config value. 

1532 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath}): 

1533 cache_manager = self._make_cache_manager(config_str) 

1534 self.assertEqual(cache_manager.cache_directory, env_dir) 

1535 

1536 # This environment variable should not override the config value. 

1537 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1538 cache_manager = self._make_cache_manager(config_str) 

1539 self.assertEqual(cache_manager.cache_directory, root) 

1540 

1541 # No default setting. 

1542 config_str = """ 

1543cached: 

1544 root: null 

1545 default: true 

1546 cacheable: 

1547 metric1: false 

1548 """ 

1549 cache_manager = self._make_cache_manager(config_str) 

1550 

1551 # This environment variable should override the config value. 

1552 with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": env_dir.ospath}): 

1553 cache_manager = self._make_cache_manager(config_str) 

1554 self.assertEqual(cache_manager.cache_directory, env_dir) 

1555 

1556 # If both environment variables are set the main (not IF_UNSET) 

1557 # variable should win. 

1558 with unittest.mock.patch.dict( 

1559 os.environ, 

1560 { 

1561 "DAF_BUTLER_CACHE_DIRECTORY": env_dir.ospath, 

1562 "DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": elsewhere.ospath, 

1563 }, 

1564 ): 

1565 cache_manager = self._make_cache_manager(config_str) 

1566 self.assertEqual(cache_manager.cache_directory, env_dir) 

1567 

1568 # Use the API to set the environment variable, making sure that the 

1569 # variable is reset on exit. 

1570 with unittest.mock.patch.dict( 

1571 os.environ, 

1572 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1573 ): 

1574 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1575 self.assertTrue(defined) 

1576 cache_manager = self._make_cache_manager(config_str) 

1577 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1578 

1579 # Now create the cache manager ahead of time and set the fallback 

1580 # later. 

1581 cache_manager = self._make_cache_manager(config_str) 

1582 self.assertIsNone(cache_manager._cache_directory) 

1583 with unittest.mock.patch.dict( 

1584 os.environ, 

1585 {"DAF_BUTLER_CACHE_DIRECTORY_IF_UNSET": ""}, 

1586 ): 

1587 defined, cache_dir = DatastoreCacheManager.set_fallback_cache_directory_if_unset() 

1588 self.assertTrue(defined) 

1589 self.assertEqual(cache_manager.cache_directory, ResourcePath(cache_dir, forceDirectory=True)) 

1590 

1591 def testExplicitCacheDir(self): 

1592 config_str = f""" 

1593cached: 

1594 root: '{self.root}' 

1595 cacheable: 

1596 metric0: true 

1597 """ 

1598 cache_manager = self._make_cache_manager(config_str) 

1599 

1600 # Look inside to check we do have a cache directory. 

1601 self.assertEqual(cache_manager.cache_directory, ResourcePath(self.root, forceDirectory=True)) 

1602 

1603 self.assertCache(cache_manager) 

1604 

1605 # Test that the cache directory is not marked temporary 

1606 self.assertFalse(cache_manager.cache_directory.isTemporary) 

1607 

1608 def assertCache(self, cache_manager): 

1609 self.assertTrue(cache_manager.should_be_cached(self.refs[0])) 

1610 self.assertFalse(cache_manager.should_be_cached(self.refs[1])) 

1611 

1612 uri = cache_manager.move_to_cache(self.files[0], self.refs[0]) 

1613 self.assertIsInstance(uri, ResourcePath) 

1614 self.assertIsNone(cache_manager.move_to_cache(self.files[1], self.refs[1])) 

1615 

1616 # Check presence in cache using ref and then using file extension. 

1617 self.assertFalse(cache_manager.known_to_cache(self.refs[1])) 

1618 self.assertTrue(cache_manager.known_to_cache(self.refs[0])) 

1619 self.assertFalse(cache_manager.known_to_cache(self.refs[1], self.files[1].getExtension())) 

1620 self.assertTrue(cache_manager.known_to_cache(self.refs[0], self.files[0].getExtension())) 

1621 

1622 # Cached file should no longer exist but uncached file should be 

1623 # unaffected. 

1624 self.assertFalse(self.files[0].exists()) 

1625 self.assertTrue(self.files[1].exists()) 

1626 

1627 # Should find this file and it should be within the cache directory. 

1628 with cache_manager.find_in_cache(self.refs[0], ".txt") as found: 

1629 self.assertTrue(found.exists()) 

1630 self.assertIsNotNone(found.relative_to(cache_manager.cache_directory)) 

1631 

1632 # Should not be able to find these in cache 

1633 with cache_manager.find_in_cache(self.refs[0], ".fits") as found: 

1634 self.assertIsNone(found) 

1635 with cache_manager.find_in_cache(self.refs[1], ".fits") as found: 

1636 self.assertIsNone(found) 

1637 

1638 def testNoCache(self): 

1639 cache_manager = DatastoreDisabledCacheManager("", universe=self.universe) 

1640 for uri, ref in zip(self.files, self.refs): 

1641 self.assertFalse(cache_manager.should_be_cached(ref)) 

1642 self.assertIsNone(cache_manager.move_to_cache(uri, ref)) 

1643 self.assertFalse(cache_manager.known_to_cache(ref)) 

1644 with cache_manager.find_in_cache(ref, ".txt") as found: 

1645 self.assertIsNone(found, msg=f"{cache_manager}") 

1646 

1647 def _expiration_config(self, mode: str, threshold: int) -> str: 

1648 return f""" 

1649cached: 

1650 default: true 

1651 expiry: 

1652 mode: {mode} 

1653 threshold: {threshold} 

1654 cacheable: 

1655 unused: true 

1656 """ 

1657 

1658 def testCacheExpiryFiles(self): 

1659 threshold = 2 # Keep at least 2 files. 

1660 mode = "files" 

1661 config_str = self._expiration_config(mode, threshold) 

1662 

1663 cache_manager = self._make_cache_manager(config_str) 

1664 

1665 # Check that an empty cache returns unknown for arbitrary ref 

1666 self.assertFalse(cache_manager.known_to_cache(self.refs[0])) 

1667 

1668 # Should end with datasets: 2, 3, 4 

1669 self.assertExpiration(cache_manager, 5, threshold + 1) 

1670 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1671 

1672 # Check that we will not expire a file that is actively in use. 

1673 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1674 self.assertIsNotNone(found) 

1675 

1676 # Trigger cache expiration that should remove the file 

1677 # we just retrieved. Should now have: 3, 4, 5 

1678 cached = cache_manager.move_to_cache(self.files[5], self.refs[5]) 

1679 self.assertIsNotNone(cached) 

1680 

1681 # Cache should still report the standard file count. 

1682 self.assertEqual(cache_manager.file_count, threshold + 1) 

1683 

1684 # Add additional entry to cache. 

1685 # Should now have 4, 5, 6 

1686 cached = cache_manager.move_to_cache(self.files[6], self.refs[6]) 

1687 self.assertIsNotNone(cached) 

1688 

1689 # Is the file still there? 

1690 self.assertTrue(found.exists()) 

1691 

1692 # Can we read it? 

1693 data = found.read() 

1694 self.assertGreater(len(data), 0) 

1695 

1696 # Outside context the file should no longer exist. 

1697 self.assertFalse(found.exists()) 

1698 

1699 # File count should not have changed. 

1700 self.assertEqual(cache_manager.file_count, threshold + 1) 

1701 

1702 # Dataset 2 was in the exempt directory but because hardlinks 

1703 # are used it was deleted from the main cache during cache expiry 

1704 # above and so should no longer be found. 

1705 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1706 self.assertIsNone(found) 

1707 

1708 # And the one stored after it is also gone. 

1709 with cache_manager.find_in_cache(self.refs[3], ".txt") as found: 

1710 self.assertIsNone(found) 

1711 

1712 # But dataset 4 is present. 

1713 with cache_manager.find_in_cache(self.refs[4], ".txt") as found: 

1714 self.assertIsNotNone(found) 

1715 

1716 # Adding a new dataset to the cache should now delete it. 

1717 cache_manager.move_to_cache(self.files[7], self.refs[7]) 

1718 

1719 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1720 self.assertIsNone(found) 

1721 

1722 def testCacheExpiryDatasets(self): 

1723 threshold = 2 # Keep 2 datasets. 

1724 mode = "datasets" 

1725 config_str = self._expiration_config(mode, threshold) 

1726 

1727 cache_manager = self._make_cache_manager(config_str) 

1728 self.assertExpiration(cache_manager, 5, threshold + 1) 

1729 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1730 

1731 def testCacheExpiryDatasetsComposite(self): 

1732 threshold = 2 # Keep 2 datasets. 

1733 mode = "datasets" 

1734 config_str = self._expiration_config(mode, threshold) 

1735 

1736 cache_manager = self._make_cache_manager(config_str) 

1737 

1738 n_datasets = 3 

1739 for i in range(n_datasets): 

1740 for component_file, component_ref in zip(self.comp_files[i], self.comp_refs[i]): 

1741 cached = cache_manager.move_to_cache(component_file, component_ref) 

1742 self.assertIsNotNone(cached) 

1743 self.assertTrue(cache_manager.known_to_cache(component_ref)) 

1744 self.assertTrue(cache_manager.known_to_cache(component_ref.makeCompositeRef())) 

1745 self.assertTrue(cache_manager.known_to_cache(component_ref, component_file.getExtension())) 

1746 

1747 self.assertEqual(cache_manager.file_count, 6) # 2 datasets each of 3 files 

1748 

1749 # Write two new non-composite and the number of files should drop. 

1750 self.assertExpiration(cache_manager, 2, 5) 

1751 

1752 def testCacheExpirySize(self): 

1753 threshold = 55 # Each file is 10 bytes 

1754 mode = "size" 

1755 config_str = self._expiration_config(mode, threshold) 

1756 

1757 cache_manager = self._make_cache_manager(config_str) 

1758 self.assertExpiration(cache_manager, 10, 6) 

1759 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1760 

1761 def assertExpiration(self, cache_manager, n_datasets, n_retained): 

1762 """Insert the datasets and then check the number retained.""" 

1763 for i in range(n_datasets): 

1764 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1765 self.assertIsNotNone(cached) 

1766 

1767 self.assertEqual(cache_manager.file_count, n_retained) 

1768 

1769 # The oldest file should not be in the cache any more. 

1770 for i in range(n_datasets): 

1771 with cache_manager.find_in_cache(self.refs[i], ".txt") as found: 

1772 if i >= n_datasets - n_retained: 

1773 self.assertIsInstance(found, ResourcePath) 

1774 else: 

1775 self.assertIsNone(found) 

1776 

1777 def testCacheExpiryAge(self): 

1778 threshold = 1 # Expire older than 2 seconds 

1779 mode = "age" 

1780 config_str = self._expiration_config(mode, threshold) 

1781 

1782 cache_manager = self._make_cache_manager(config_str) 

1783 self.assertIn(f"{mode}={threshold}", str(cache_manager)) 

1784 

1785 # Insert 3 files, then sleep, then insert more. 

1786 for i in range(2): 

1787 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1788 self.assertIsNotNone(cached) 

1789 time.sleep(2.0) 

1790 for j in range(4): 

1791 i = 2 + j # Continue the counting 

1792 cached = cache_manager.move_to_cache(self.files[i], self.refs[i]) 

1793 self.assertIsNotNone(cached) 

1794 

1795 # Only the files written after the sleep should exist. 

1796 self.assertEqual(cache_manager.file_count, 4) 

1797 with cache_manager.find_in_cache(self.refs[1], ".txt") as found: 

1798 self.assertIsNone(found) 

1799 with cache_manager.find_in_cache(self.refs[2], ".txt") as found: 

1800 self.assertIsInstance(found, ResourcePath) 

1801 

1802 

1803class DatasetRefURIsTestCase(unittest.TestCase): 

1804 """Tests for DatasetRefURIs.""" 

1805 

1806 def testSequenceAccess(self): 

1807 """Verify that DatasetRefURIs can be treated like a two-item tuple.""" 

1808 uris = DatasetRefURIs() 

1809 

1810 self.assertEqual(len(uris), 2) 

1811 self.assertEqual(uris[0], None) 

1812 self.assertEqual(uris[1], {}) 

1813 

1814 primaryURI = ResourcePath("1/2/3") 

1815 componentURI = ResourcePath("a/b/c") 

1816 

1817 # affirm that DatasetRefURIs does not support MutableSequence functions 

1818 with self.assertRaises(TypeError): 

1819 uris[0] = primaryURI 

1820 with self.assertRaises(TypeError): 

1821 uris[1] = {"foo": componentURI} 

1822 

1823 # but DatasetRefURIs can be set by property name: 

1824 uris.primaryURI = primaryURI 

1825 uris.componentURIs = {"foo": componentURI} 

1826 self.assertEqual(uris.primaryURI, primaryURI) 

1827 self.assertEqual(uris[0], primaryURI) 

1828 

1829 primary, components = uris 

1830 self.assertEqual(primary, primaryURI) 

1831 self.assertEqual(components, {"foo": componentURI}) 

1832 

1833 def testRepr(self): 

1834 """Verify __repr__ output.""" 

1835 uris = DatasetRefURIs(ResourcePath("/1/2/3"), {"comp": ResourcePath("/a/b/c")}) 

1836 self.assertEqual( 

1837 repr(uris), 

1838 'DatasetRefURIs(ResourcePath("file:///1/2/3"), {\'comp\': ResourcePath("file:///a/b/c")})', 

1839 ) 

1840 

1841 

1842class DataIdForTestTestCase(unittest.TestCase): 

1843 """Tests for the DataIdForTest class.""" 

1844 

1845 def testImmutable(self): 

1846 """Verify that an instance is immutable by default.""" 

1847 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1848 initial_hash = hash(dataId) 

1849 

1850 with self.assertRaises(RuntimeError): 

1851 dataId["instrument"] = "foo" 

1852 

1853 with self.assertRaises(RuntimeError): 

1854 del dataId["instrument"] 

1855 

1856 assert sys.version_info[0] == 3 

1857 if sys.version_info[1] >= 9: 

1858 with self.assertRaises(RuntimeError): 

1859 dataId |= dict(foo="bar") 

1860 

1861 with self.assertRaises(RuntimeError): 

1862 dataId.pop("instrument") 

1863 

1864 with self.assertRaises(RuntimeError): 

1865 dataId.popitem() 

1866 

1867 with self.assertRaises(RuntimeError): 

1868 dataId.update(dict(instrument="foo")) 

1869 

1870 # verify that the hash value has not changed. 

1871 self.assertEqual(initial_hash, hash(dataId)) 

1872 

1873 def testMutable(self): 

1874 """Verify that an instance can be made mutable (unfrozen).""" 

1875 dataId = DataIdForTest({"instrument": "dummy", "visit": 52, "physical_filter": "V"}) 

1876 initial_hash = hash(dataId) 

1877 dataId.frozen = False 

1878 self.assertEqual(initial_hash, hash(dataId)) 

1879 

1880 dataId["instrument"] = "foo" 

1881 self.assertEqual(dataId["instrument"], "foo") 

1882 self.assertNotEqual(initial_hash, hash(dataId)) 

1883 initial_hash = hash(dataId) 

1884 

1885 del dataId["instrument"] 

1886 self.assertTrue("instrument" not in dataId) 

1887 self.assertNotEqual(initial_hash, hash(dataId)) 

1888 initial_hash = hash(dataId) 

1889 

1890 assert sys.version_info[0] == 3 

1891 if sys.version_info[1] >= 9: 

1892 dataId |= dict(foo="bar") 

1893 self.assertEqual(dataId["foo"], "bar") 

1894 self.assertNotEqual(initial_hash, hash(dataId)) 

1895 initial_hash = hash(dataId) 

1896 

1897 dataId.pop("visit") 

1898 self.assertTrue("visit" not in dataId) 

1899 self.assertNotEqual(initial_hash, hash(dataId)) 

1900 initial_hash = hash(dataId) 

1901 

1902 dataId.popitem() 

1903 self.assertTrue("physical_filter" not in dataId) 

1904 self.assertNotEqual(initial_hash, hash(dataId)) 

1905 initial_hash = hash(dataId) 

1906 

1907 dataId.update(dict(instrument="foo")) 

1908 self.assertEqual(dataId["instrument"], "foo") 

1909 self.assertNotEqual(initial_hash, hash(dataId)) 

1910 initial_hash = hash(dataId) 

1911 

1912 

1913class StoredFileInfoTestCase(DatasetTestHelper, unittest.TestCase): 

1914 storageClassFactory = StorageClassFactory() 

1915 

1916 def test_StoredFileInfo(self): 

1917 storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") 

1918 ref = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False) 

1919 

1920 record = dict( 

1921 storage_class="StructuredDataDict", 

1922 formatter="lsst.daf.butler.Formatter", 

1923 path="a/b/c.txt", 

1924 component="component", 

1925 dataset_id=ref.id, 

1926 checksum=None, 

1927 file_size=5, 

1928 ) 

1929 info = StoredFileInfo.from_record(record) 

1930 

1931 self.assertEqual(info.dataset_id, ref.id) 

1932 self.assertEqual(info.to_record(), record) 

1933 

1934 ref2 = self.makeDatasetRef("metric", DimensionUniverse().extract(()), storageClass, {}, conform=False) 

1935 rebased = info.rebase(ref2) 

1936 self.assertEqual(rebased.dataset_id, ref2.id) 

1937 self.assertEqual(rebased.rebase(ref), info) 

1938 

1939 with self.assertRaises(TypeError): 

1940 rebased.update(formatter=42) 

1941 

1942 with self.assertRaises(ValueError): 

1943 rebased.update(something=42, new="42") 

1944 

1945 # Check that pickle works on StoredFileInfo. 

1946 pickled_info = pickle.dumps(info) 

1947 unpickled_info = pickle.loads(pickled_info) 

1948 self.assertEqual(unpickled_info, info) 

1949 

1950 

1951if __name__ == "__main__": 

1952 unittest.main()